Cafeteria-fixed

Sleeping

App Files Files Community

Cafeteria-fixed / app.py

Kryptone

check changelog tab

cf6afc4 verified over 2 years ago

raw

history blame

20.2 kB

	import gradio as gr, glob, os, auditok, zipfile, wave, pytube.exceptions, librosa, time
	from pytube import YouTube
	from moviepy.editor import VideoFileClip

	def download_video(url, download_as):
	try:
	yt = YouTube(url)
	except pytube.exceptions.RegexMatchError:
	raise gr.Error("URL not valid or is empty! Please fix the link or enter one!")
	video = yt.streams.get_highest_resolution()
	video.download()
	video_path = f"{video.default_filename}"
	video_clip = VideoFileClip(video_path)
	audio_clip = video_clip.audio
	if download_as == "wav":
	audio_clip.write_audiofile("output.wav")
	elif download_as == "mp3":
	audio_clip.write_audiofile("output.mp3")
	audio_clip.close()
	video_clip.close()
	for removalmp4 in glob.glob("*.mp4"):
	os.remove(removalmp4)
	return "Finished downloading! Please proceed to next tab."

	def split_audio_from_yt_video(mindur, maxdur, name_for_split_files, show_amount_of_files_and_file_dur):
	if show_amount_of_files_and_file_dur == True:
	gr.Warning(f"show_amount_of_files_and_file_dur set to True. This will take longer if your audio file is long.")
	if not os.path.exists("output.mp3") and not os.path.exists("output.wav"):
	raise gr.Error("Neither output.mp3 or output.wav exist! Did the video download correctly?")
	if mindur == maxdur:
	raise gr.Error(f"Cannot split mindur={mindur} and maxdur={maxdur}, min and max are the same number.")
	elif mindur > maxdur:
	raise gr.Error(f"Cannot split mindur={mindur} and maxdur={maxdur}, mindur is higher than maxdur.")
	elif name_for_split_files == None:
	raise gr.Error("Split files name cannot be empty!")
	else:
	audio_path = "output.wav" if not os.path.exists("output.mp3") else "output.mp3"
	audio_regions = auditok.split(
	audio_path,
	min_dur=mindur,
	max_dur=maxdur,
	max_silence=0.3,
	energy_threshold=45
	)
	os.remove(audio_path)
	for i, r in enumerate(audio_regions):
	filename = r.save(f"{name_for_split_files}-{i+1}.wav")
	for f in sorted(glob.glob("*.wav")):
	audio_files = glob.glob("*.wav")
	zip_file_name = "audio_files.zip"
	with zipfile.ZipFile(zip_file_name, "w") as zip_file:
	for audio_file in audio_files:
	zip_file.write(audio_file, os.path.basename(audio_file))
	if show_amount_of_files_and_file_dur == False:
	for file2 in glob.glob("*.wav"):
	os.remove(file2)
	return "Files split successfully!\nCheck below for zipped files.", zip_file_name
	elif show_amount_of_files_and_file_dur == True:
	largest_file = ("", 0)
	total_files = 0
	total_length = 0.0
	for file_name in glob.glob("*.wav"):
	file_path = os.path.join(os.getcwd(), file_name)
	if file_path.lower().endswith(".wav"):
	try:
	with wave.open(file_path, 'r') as audio_file:
	frames = audio_file.getnframes()
	rate = audio_file.getframerate()
	duration = frames / float(rate)
	file_size = os.path.getsize(file_path)
	if file_size > largest_file[1]:
	largest_file = (file_name, file_size)
	total_length += duration
	total_files += 1
	except wave.Error as e:
	raise gr.Error(f"Error reading file: {e}")
	length_mins = total_length / 60
	for file2 in glob.glob("*.wav"):
	os.remove(file2)
	return f"Files split successfully!\nCheck below for zipped files.\n\n{total_files} files created, {length_mins:.2f} minutes total.", zip_file_name

	def analyze_audio(zip_file_path):
	with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
	zip_ref.extractall('unzipped_files')
	total_sample_rate = 0
	total_files = 0
	for file_name in os.listdir('unzipped_files'):
	if file_name.lower().endswith('.wav'):
	file_path = os.path.join('unzipped_files', file_name)
	try:
	with wave.open(file_path, 'r') as audio_file:
	sample_rate = audio_file.getframerate()
	total_sample_rate += sample_rate
	total_files += 1
	except wave.Error as e:
	print(f"Error reading file: {e}")
	if total_files > 0:
	average_sample_rate = total_sample_rate / total_files
	return f"Average sample rate: {average_sample_rate}"
	else:
	return "No average sample rate could be found."

	def split_wav_or_mp3_file(audiofileuploader, mindur2, maxdur2, name_for_split_files2):
	if audiofileuploader == None:
	raise gr.Error("Audio file cannot be empty!")
	if mindur2 == maxdur2:
	raise gr.Error(f"Cannot split mindur={mindur} and maxdur={maxdur}, min and max are the same number.")
	elif mindur2 > maxdur2:
	raise gr.Error(f"Cannot split mindur={mindur} and maxdur={maxdur}, mindur is higher than maxdur.")
	elif name_for_split_files2 == None:
	raise gr.Error("Split files name cannot be empty!")
	else:
	audio_path = audiofileuploader
	audio_regions = auditok.split(
	audio_path,
	min_dur=mindur2,
	max_dur=maxdur2,
	max_silence=0.3,
	energy_threshold=45
	)
	os.remove(audio_path)
	for i, r in enumerate(audio_regions):
	filename = r.save(f"{name_for_split_files2}-{i+1}.wav")
	for f in sorted(glob.glob("*.wav")):
	audio_files = glob.glob("*.wav")
	zip_file_name2 = "audio_files.zip"
	with zipfile.ZipFile(zip_file_name2, "w") as zip_file:
	for audio_file in audio_files:
	zip_file.write(audio_file, os.path.basename(audio_file))
	for file2 in glob.glob("*.wav"):
	os.remove(file2)
	return f"File split successfully!\nCheck below for zipped files.\nAmount created: {len(audio_files)}", zip_file_name2

	def get_average_pitch(audio_file):
	if audio_file == None:
	raise gr.Error("Audio file cannot be empty!")
	else:
	y, sr = librosa.load(audio_file, sr=None)
	pitches, magnitudes = librosa.piptrack(y=y, sr=sr)
	mean_pitch = pitches.mean()
	return f"Average pitch: {mean_pitch:.2f} Hz"

	def all_in_one_inator(ytvideo, download_yt_video_as, min_duration, max_duration, name_for_outputted_split_files, progress=gr.Progress()):
	if download_as == "mp3":
	gr.Warning("MP3 is experimental, especially with this, so caution is advised.")
	if min_duration == max_duration:
	raise gr.Error(f"Cannot split mindur={min_duration} and maxdur={max_duration}, min and max are the same number.")
	elif min_duration > max_duration:
	raise gr.Error(f"Cannot split mindur={min_duration} and maxdur={max_duration}, mindur is higher than maxdur.")
	elif name_for_outputted_split_files == None:
	raise gr.Error("Split files name cannot be empty!")
	else:
	try:
	progress(0, "Downloading video...")
	yt = YouTube(ytvideo)
	except pytube.exceptions.RegexMatchError:
	raise gr.Error("URL not valid or was left empty! Please fix the link or enter one.")
	video = yt.streams.get_highest_resolution()
	video.download()
	video_path = f"{video.default_filename}"
	video_clip = VideoFileClip(video_path)
	audio_clip = video_clip.audio
	if download_yt_video_as == "wav":
	audio_clip.write_audiofile("output.wav")
	elif download_yt_video_as == "mp3":
	audio_clip.write_audiofile("output.mp3")
	audio_clip.close()
	video_clip.close()
	for removemp4 in glob.glob("*.mp4"):
	os.remove(removemp4)
	progress(0.5, "Video downloaded! Starting split process...")
	audio_path = "output.wav" if not os.path.exists("output.mp3") else "output.mp3"
	audio_regions = auditok.split(
	audio_path,
	min_dur=min_duration,
	max_dur=max_duration,
	max_silence=0.3,
	energy_threshold=45
	)
	os.remove(audio_path)
	for i, r in enumerate(audio_regions):
	filename = r.save(f"{name_for_outputted_split_files}-{i+1}.wav")
	for f in sorted(glob.glob("*.wav")):
	audio_files = glob.glob("*.wav")
	zip_file_name = "audio_files.zip"
	with zipfile.ZipFile(zip_file_name, 'w') as zip_file:
	for audio_file in audio_files:
	zip_file.write(audio_file, os.path.basename(audio_file))
	for file2 in glob.glob("*.wav"):
	os.remove(file2)
	progress(1, "Done! Cleaning up...")
	time.sleep(2)
	return "Process done successfully! Check below for zipped files!", zip_file_name

	def download_video_as_audio_only(yt_video, audio_output_format):
	try:
	yt = YouTube(yt_video)
	except pytube.exceptions.RegexMatchError:
	raise gr.Error("URL not valid or is empty! Please fix the link or enter one!")
	video = yt.streams.get_highest_resolution()
	video.download()
	video_path = f"{video.default_filename}"
	video_clip = VideoFileClip(video_path)
	audio_clip = video_clip.audio
	if audio_output_format == "wav":
	audio_clip.write_audiofile("output.wav")
	elif audio_output_format == "mp3":
	audio_clip.write_audiofile("output.mp3")
	audio_clip.close()
	video_clip.close()
	for mp4remove in glob.glob("*.mp4"):
	os.remove(mp4remove)
	single_zip_name = "only_audio.zip"
	audio_files = glob.glob(".wav") if audio_output_format == "wav" else glob.glob(".mp3")
	with zipfile.ZipFile(single_zip_name, 'w') as zip_file:
	for audio_file in audio_files:
	zip_file.write(audio_file, os.path.basename(audio_file))
	for outputwavremoval in glob.glob("*.wav"):
	if os.path.exists(outputwavremoval):
	os.remove(outputwavremoval)
	for outputmp3removal in glob.glob("*.mp3"):
	if os.path.exists(outputmp3removal):
	os.remove(outputmp3removal)
	return "Done! Download the zip file below! This only contains the audio file.", single_zip_name

	def check_for_remaining_wav_or_mp3_files(which_filetype):
	audio_files = glob.glob(f"*.{which_filetype}")
	return f"There are {len(audio_files)} leftover files."

	with gr.Blocks(theme='sudeepshouche/minimalist', title="Global Dataset Maker") as app:
	gr.HTML(
	"<h1> Welcome to the GDMGS! (GlobalDatasetMaker Gradio Space) </h1>"
	)
	gr.Markdown("## Duplicate this space if you want to make your own changes!")
	gr.HTML(
	"""<p style="margin:5px auto;display: flex;justify-content: left;">
	<a href="https://huggingface.co/spaces/Kryptone/GDMGS?duplicate=true"><img src="https://huggingface.co/datasets/huggingface/badges/resolve/main/duplicate-this-space-md-dark.svg" alt="Duplicate this Space"></a>
	</p>"""
	)
	gr.Markdown(
	"This Space will create a dataset for you, all automatically. Please be warned that due to not having a GPU on this Space, some steps might take longer to complete."
	)
	with gr.Tabs():
	with gr.TabItem("Download Video"):
	with gr.Row():
	with gr.Column():
	with gr.Row():
	url = gr.Textbox(label="URL")
	download_as = gr.Radio(["wav", "mp3"], label="Audio format output", value="wav", info="What should the audio format be output as?")
	convertion = gr.Button("Download", variant='primary')
	convertion.click(
	fn=download_video,
	inputs=[url, download_as],
	outputs=gr.Text(label="Output")
	)
	with gr.TabItem("Split audio files"):
	with gr.Row():
	with gr.Column():
	with gr.Row():
	mindur = gr.Number(label="Min duration", minimum=1, maximum=10, value=1)
	maxdur = gr.Number(label="Max duration", minimum=1, maximum=10, value=5)
	name_for_split_files = gr.Textbox(label="Name for split files")
	show_amount_of_files_and_file_dur = gr.Checkbox(False, label="Show total amount of files and duration?")
	splitbtn = gr.Button("Split", variant='primary')
	splitbtn.click(
	split_audio_from_yt_video,
	inputs=[mindur, maxdur, name_for_split_files, show_amount_of_files_and_file_dur],
	outputs=[gr.Text(label="Output"), gr.File(label="Zipped files")]
	)
	with gr.TabItem("Misc tools"):
	with gr.Tab("SR analyzer"):
	gr.Markdown("Upload a zip file of your wavs here and this will determine the average sample rate.")
	with gr.Row():
	with gr.Column():
	with gr.Row():
	zipuploader = gr.File(file_count='single', file_types=[".zip"], label="ZIP file")
	uploadbtn = gr.Button("Analyze", variant='primary')
	uploadbtn.click(
	analyze_audio,
	[zipuploader],
	[gr.Text(label="Result")]
	)
	with gr.Tab("File splitter"):
	gr.Markdown("If you would rather split a single WAV or mp3 audio file, use this method instead.")
	with gr.Row():
	with gr.Column():
	with gr.Row():
	audiofileuploader = gr.File(file_count='single', file_types=[".wav", ".mp3"], label="WAV or mp3 file")
	mindur2 = gr.Number(label="Min duration", minimum=1, maximum=10, value=1)
	maxdur2 = gr.Number(label="Max duration", minimum=1, maximum=10, value=5)
	name_for_split_files2 = gr.Textbox(label="Name for split files")
	audiofileuploadbtn = gr.Button("Split", variant='primary')
	audiofileuploadbtn.click(
	split_wav_or_mp3_file,
	[audiofileuploader, mindur2, maxdur2, name_for_split_files2],
	[gr.Text(label="Output"), gr.File(label="Zipped files")]
	)
	with gr.Tab("Pitch analyzer"):
	gr.Markdown("Upload a wav file here, and this will determine the average pitch.")
	gr.HTML("<h1> Zip files and MP3 files are not supported as of now.")
	with gr.Row():
	with gr.Column():
	with gr.Row():
	upload = gr.File(file_count='single', file_types=[".wav"], label="WAV file")
	analyze = gr.Button("Analyze", variant='primary')
	analyze.click(
	get_average_pitch,
	[upload],
	[gr.Text(label="Result")]
	)
	with gr.Tab("All-in-one downloader and splitter"):
	gr.Markdown("This is very experimental and may break or change in the future. This essentially combines both the first 2 tabs into an all-in-one script.")
	with gr.Row():
	with gr.Column():
	with gr.Row():
	ytvideo = gr.Textbox(label="URL")
	download_yt_video_as = gr.Radio(["wav", "mp3"], value="wav", label="Audio output format")
	min_duration = gr.Number(label="Min duration", minimum=1, maximum=10, value=1)
	max_duration = gr.Number(label="Max duration", minimum=1, maximum=10, value=5)
	name_for_outputted_split_files = gr.Textbox(label="Name for split files")
	download_and_split_btn = gr.Button("Download and split", variant='primary')
	download_and_split_btn.click(
	all_in_one_inator,
	[ytvideo, download_yt_video_as, min_duration, max_duration, name_for_outputted_split_files],
	[gr.Text(label="Result"), gr.File(label="Zipped files")]
	)
	with gr.Tab("Audio only download"):
	gr.Markdown("If you want to download only the audio (to isolate bgm using UVR, etc), use this method, which will only extract audio and not split the audio.")
	with gr.Row():
	with gr.Column():
	with gr.Row():
	yt_video = gr.Textbox(label="URL")
	audio_output_format = gr.Radio(["wav", "mp3"], value="wav", label="Download audio as:")
	commence_download = gr.Button("Download", variant='primary')
	commence_download.click(
	download_video_as_audio_only,
	[yt_video, audio_output_format],
	[gr.Text(label="Output"), gr.File(label="Zipped audio file")]
	)
	with gr.Tab("Check for leftover mp3 or wav files"):
	gr.Markdown("There might be instances where sometimes a few wav or mp3 files are left over after a conversion. This section tells how many of those files are left, if any.")
	with gr.Row():
	with gr.Column():
	with gr.Row():
	which_filetype = gr.Radio(["wav", "mp3"], value="wav", label="Search for what filetype?")
	checkbtn = gr.Button("Check for files", variant='primary')
	checkbtn.click(
	check_for_remaining_wav_or_mp3_files,
	which_filetype,
	gr.Text(label="Result")
	)
	with gr.TabItem("Changelog"):
	gr.Markdown("v0.96 - Added new remaining files tool in Misc Tools.")
	gr.Markdown("v0.95 - Fixed issue with mp3 files not downloading audio properly.")
	gr.Markdown("v0.94a - Fixed issue with existing output.wav or output.mp3 files clashing with the split audio files with addition of the new tool.")
	gr.Markdown("v0.94 - Added new tool: YouTube-to-audio.")
	gr.Markdown("v0.93 - Removed obsolete warnings and fixed issue with all-in-one if output.mp3 or output.wav doesnt exist.")
	gr.Markdown("v0.92 - Added all-in-one tab under Misc Tools.")
	gr.Markdown("v0.91 - Added mp3 file support for single file splitting, and also fixed bug if neither output.wav or output.mp3 exists.")
	gr.Markdown("v0.90a - Fixed bug that if 'show_amount_of_files_and_file_dur' was False, split wav files would not be deleted.")
	gr.Markdown("v0.90 - Added mp3 support for downloading a Youtube video.")
	gr.Markdown("v0.85 - Fixed bug in pitch analyzer if no audio file was given.")
	gr.Markdown("v0.80 - Added new tool: Pitch Analyzer.")
	gr.Markdown("v0.75 - Fixed bug that would cause split wav files to be packaged with the previously split wav files.")
	gr.Markdown("v0.74 - Added new tool: WAV file splitter.")
	gr.Markdown("v0.73 - Added Misc Tools tab and new Sample Rate analyzer tool.")
	gr.Markdown("v0.70 - Fixed bug if no URL was passed or if the URL was invalid.")
	gr.Markdown("v0.65 - Fixed bug if user tried to split an audio file when 'output.wav' didnt exist.")
	gr.Markdown("v0.60 - Initial push to Huggingface Space.")

	app.launch()