Spaces:

latterworks
/

A

Sleeping

App Files Files Community

A / app.py

latterworks

Update app.py

ce13058 verified 7 months ago

raw

history blame

7.84 kB

	import gradio as gr
	from pathlib import Path
	import yt_dlp
	import logging
	import librosa
	import numpy as np
	from PIL import Image
	import ffmpeg
	import shutil
	import tempfile
	import time

	# Set up logging for debugging
	logging.basicConfig(level=logging.DEBUG)

	def analyze_audio(youtube_url, input_text, input_image=None, slider_value=50, checkbox_value=False):
	"""
	Downloads YouTube audio, performs automatic audio feature analysis with librosa, and processes inputs.
	Automatically handles file and folder management.

	Args:
	youtube_url (str): YouTube video URL (optional).
	input_text (str): Text input for processing.
	input_image (PIL.Image, optional): Image input for processing.
	slider_value (float): Numerical parameter (e.g., analysis threshold).
	checkbox_value (bool): Toggle for enhanced analysis.

	Returns:
	tuple: (processed_text, output_image_display, output_audio, extra_info)
	"""
	# Create a unique temporary directory for this run
	temp_dir = Path(tempfile.mkdtemp(prefix="audio_analysis_"))
	output_dir = temp_dir / "downloaded_media"
	output_dir.mkdir(parents=True, exist_ok=True)
	logging.debug(f"Created temporary directory: {temp_dir}, output directory: {output_dir}")

	try:
	# Initialize outputs
	processed_text = f"Processed: '{input_text}'."
	output_image_display = input_image
	output_audio = None
	extra_info = f"Threshold: {slider_value/100:.2f}"

	# Handle YouTube download if URL is provided
	if youtube_url:
	try:
	# Validate YouTube URL
	if not youtube_url.startswith(("https://www.youtube.com/", "https://youtu.be/")):
	return "Error: Invalid YouTube URL", None, None, "Processing failed."

	# YouTube download options (audio only)
	ydl_opts = {
	'format': 'bestaudio/best',
	'outtmpl': str(output_dir / '%(title)s.%(ext)s'),
	'postprocessors': [{
	'key': 'FFmpegExtractAudio',
	'preferredcodec': 'mp3',
	'preferredquality': '192',
	}],
	'restrictfilenames': True,
	'noplaylist': True,
	}

	# Download audio
	with yt_dlp.YoutubeDL(ydl_opts) as ydl:
	info = ydl.extract_info(youtube_url, download=True)
	audio_file = output_dir / f"{info['title']}.mp3"
	logging.debug(f"Downloaded audio: {audio_file}")
	output_audio = str(audio_file)

	# Perform automatic audio feature analysis with librosa
	y, sr = librosa.load(audio_file)
	hop_length = 512 # Valid hop_length to fix "Invalid hop_length: 0" error
	logging.debug(f"Using hop_length: {hop_length}")

	# Extract features
	mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13, hop_length=hop_length)
	spectral_centroid = librosa.feature.spectral_centroid(y=y, sr=sr, hop_length=hop_length)
	tempo, _ = librosa.beat.beat_track(y=y, sr=sr, hop_length=hop_length)

	# Aggregate features
	mfcc_mean = np.mean(mfcc, axis=1).tolist()[:3] # Mean of first 3 MFCC coefficients
	spectral_centroid_mean = np.mean(spectral_centroid)
	features_summary = (
	f"Audio Features: MFCC (mean of first 3 coeffs): {mfcc_mean}, "
	f"Spectral Centroid: {spectral_centroid_mean:.2f} Hz, "
	f"Tempo: {tempo:.2f} BPM"
	)

	processed_text += f" {features_summary}."
	extra_info += f", Audio: {audio_file.name}"

	except Exception as e:
	logging.error(f"YouTube download or audio processing error: {str(e)}")
	processed_text += f" Error processing YouTube audio: {str(e)}."

	# Handle image processing if provided
	if input_image is not None:
	from PIL import ImageEnhance
	enhancer = ImageEnhance.Brightness(input_image)
	output_image_display = enhancer.enhance(1.5)
	processed_text += " Image processed (brightened)."
	else:
	processed_text += " No image provided."

	# Incorporate slider and checkbox
	processed_text += f" Slider: {slider_value}, Enhanced Analysis: {checkbox_value}."
	if checkbox_value:
	processed_text += " Enhanced analysis enabled."
	if youtube_url and slider_value > 50:
	processed_text += f" High threshold ({slider_value}) applied for deeper analysis."

	return processed_text, output_image_display, output_audio, extra_info

	except Exception as e:
	logging.error(f"Error in analyze_audio: {str(e)}")
	return f"Error: {str(e)}", None, None, "Processing failed."

	finally:
	# Clean up temporary directory after a delay to ensure file access
	try:
	time.sleep(1) # Brief delay to ensure Gradio can serve the audio file
	if temp_dir.exists():
	shutil.rmtree(temp_dir)
	logging.debug(f"Cleaned up temporary directory: {temp_dir}")
	except Exception as e:
	logging.error(f"Error cleaning up temporary directory: {str(e)}")

	# Define input components
	input_youtube_url = gr.Textbox(
	label="YouTube Video URL",
	placeholder="e.g., https://www.youtube.com/watch?v=dQw4w9WgXcQ",
	info="Optional: Enter a YouTube URL to download and analyze audio."
	)
	input_text_component = gr.Textbox(
	label="Input Text",
	placeholder="e.g., Analyze this audio track",
	info="Type a description or query for processing."
	)
	input_image_component = gr.Image(
	type="pil",
	label="Upload Image (Optional)",
	sources=["upload", "webcam", "clipboard"]
	)
	input_slider_component = gr.Slider(
	minimum=0,
	maximum=100,
	value=50,
	step=1,
	label="Analysis Threshold",
	info="Adjusts sensitivity of audio feature analysis."
	)
	input_checkbox_component = gr.Checkbox(
	label="Enable Enhanced Analysis",
	info="Toggle for deeper audio feature extraction."
	)

	# Define output components
	output_text_component = gr.Textbox(
	label="Analysis Results",
	info="Text results including audio feature analysis."
	)
	output_image_component = gr.Image(
	label="Processed Image (if any)",
	info="Processed image output (if provided)."
	)
	output_audio_component = gr.Audio(
	label="Downloaded Audio",
	type="filepath",
	info="Audio downloaded from YouTube."
	)
	output_label_component = gr.Label(
	label="Analysis Summary",
	info="Feature analysis details and processing info."
	)

	# Create the Gradio interface
	iface = gr.Interface(
	fn=analyze_audio,
	inputs=[
	input_youtube_url,
	input_text_component,
	input_image_component,
	input_slider_component,
	input_checkbox_component
	],
	outputs=[
	output_text_component,
	output_image_component,
	output_audio_component,
	output_label_component
	],
	title="YouTube Audio Feature Analysis",
	description="Download YouTube audio, analyze features with librosa, and process text/image inputs. Customize with slider and checkbox.",
	examples=[
	["https://www.youtube.com/watch?v=dQw4w9WgXcQ", "Analyze this track", None, 75, True],
	[None, "Describe a music track", None, 30, False],
	["https://www.youtube.com/watch?v=9bZkp7q19f0", "Extract audio features", None, 60, True]
	],
	allow_flagging="never",
	theme=gr.themes.Soft()
	)

	if __name__ == "__main__":
	iface.launch()