Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| from pathlib import Path | |
| import yt_dlp | |
| import logging | |
| import librosa | |
| import numpy as np | |
| from PIL import Image | |
| import ffmpeg | |
| import shutil | |
| import tempfile | |
| import time | |
| # Set up logging for debugging | |
| logging.basicConfig(level=logging.DEBUG) | |
| def analyze_audio(youtube_url, input_text, input_image=None, slider_value=50, checkbox_value=False): | |
| """ | |
| Downloads YouTube audio, performs automatic audio feature analysis with librosa, and processes inputs. | |
| Automatically handles file and folder management. | |
| Args: | |
| youtube_url (str): YouTube video URL (optional). | |
| input_text (str): Text input for processing. | |
| input_image (PIL.Image, optional): Image input for processing. | |
| slider_value (float): Numerical parameter (e.g., analysis threshold). | |
| checkbox_value (bool): Toggle for enhanced analysis. | |
| Returns: | |
| tuple: (processed_text, output_image_display, output_audio, extra_info) | |
| """ | |
| # Create a unique temporary directory for this run | |
| temp_dir = Path(tempfile.mkdtemp(prefix="audio_analysis_")) | |
| output_dir = temp_dir / "downloaded_media" | |
| output_dir.mkdir(parents=True, exist_ok=True) | |
| logging.debug(f"Created temporary directory: {temp_dir}, output directory: {output_dir}") | |
| try: | |
| # Initialize outputs | |
| processed_text = f"Processed: '{input_text}'." | |
| output_image_display = input_image | |
| output_audio = None | |
| extra_info = f"Threshold: {slider_value/100:.2f}" | |
| # Handle YouTube download if URL is provided | |
| if youtube_url: | |
| try: | |
| # Validate YouTube URL | |
| if not youtube_url.startswith(("https://www.youtube.com/", "https://youtu.be/")): | |
| return "Error: Invalid YouTube URL", None, None, "Processing failed." | |
| # YouTube download options (audio only) | |
| ydl_opts = { | |
| 'format': 'bestaudio/best', | |
| 'outtmpl': str(output_dir / '%(title)s.%(ext)s'), | |
| 'postprocessors': [{ | |
| 'key': 'FFmpegExtractAudio', | |
| 'preferredcodec': 'mp3', | |
| 'preferredquality': '192', | |
| }], | |
| 'restrictfilenames': True, | |
| 'noplaylist': True, | |
| } | |
| # Download audio | |
| with yt_dlp.YoutubeDL(ydl_opts) as ydl: | |
| info = ydl.extract_info(youtube_url, download=True) | |
| audio_file = output_dir / f"{info['title']}.mp3" | |
| logging.debug(f"Downloaded audio: {audio_file}") | |
| output_audio = str(audio_file) | |
| # Perform automatic audio feature analysis with librosa | |
| y, sr = librosa.load(audio_file) | |
| hop_length = 512 # Valid hop_length to fix "Invalid hop_length: 0" error | |
| logging.debug(f"Using hop_length: {hop_length}") | |
| # Extract features | |
| mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13, hop_length=hop_length) | |
| spectral_centroid = librosa.feature.spectral_centroid(y=y, sr=sr, hop_length=hop_length) | |
| tempo, _ = librosa.beat.beat_track(y=y, sr=sr, hop_length=hop_length) | |
| # Aggregate features | |
| mfcc_mean = np.mean(mfcc, axis=1).tolist()[:3] # Mean of first 3 MFCC coefficients | |
| spectral_centroid_mean = np.mean(spectral_centroid) | |
| features_summary = ( | |
| f"Audio Features: MFCC (mean of first 3 coeffs): {mfcc_mean}, " | |
| f"Spectral Centroid: {spectral_centroid_mean:.2f} Hz, " | |
| f"Tempo: {tempo:.2f} BPM" | |
| ) | |
| processed_text += f" {features_summary}." | |
| extra_info += f", Audio: {audio_file.name}" | |
| except Exception as e: | |
| logging.error(f"YouTube download or audio processing error: {str(e)}") | |
| processed_text += f" Error processing YouTube audio: {str(e)}." | |
| # Handle image processing if provided | |
| if input_image is not None: | |
| from PIL import ImageEnhance | |
| enhancer = ImageEnhance.Brightness(input_image) | |
| output_image_display = enhancer.enhance(1.5) | |
| processed_text += " Image processed (brightened)." | |
| else: | |
| processed_text += " No image provided." | |
| # Incorporate slider and checkbox | |
| processed_text += f" Slider: {slider_value}, Enhanced Analysis: {checkbox_value}." | |
| if checkbox_value: | |
| processed_text += " Enhanced analysis enabled." | |
| if youtube_url and slider_value > 50: | |
| processed_text += f" High threshold ({slider_value}) applied for deeper analysis." | |
| return processed_text, output_image_display, output_audio, extra_info | |
| except Exception as e: | |
| logging.error(f"Error in analyze_audio: {str(e)}") | |
| return f"Error: {str(e)}", None, None, "Processing failed." | |
| finally: | |
| # Clean up temporary directory after a delay to ensure file access | |
| try: | |
| time.sleep(1) # Brief delay to ensure Gradio can serve the audio file | |
| if temp_dir.exists(): | |
| shutil.rmtree(temp_dir) | |
| logging.debug(f"Cleaned up temporary directory: {temp_dir}") | |
| except Exception as e: | |
| logging.error(f"Error cleaning up temporary directory: {str(e)}") | |
| # Define input components | |
| input_youtube_url = gr.Textbox( | |
| label="YouTube Video URL", | |
| placeholder="e.g., https://www.youtube.com/watch?v=dQw4w9WgXcQ", | |
| info="Optional: Enter a YouTube URL to download and analyze audio." | |
| ) | |
| input_text_component = gr.Textbox( | |
| label="Input Text", | |
| placeholder="e.g., Analyze this audio track", | |
| info="Type a description or query for processing." | |
| ) | |
| input_image_component = gr.Image( | |
| type="pil", | |
| label="Upload Image (Optional)", | |
| sources=["upload", "webcam", "clipboard"] | |
| ) | |
| input_slider_component = gr.Slider( | |
| minimum=0, | |
| maximum=100, | |
| value=50, | |
| step=1, | |
| label="Analysis Threshold", | |
| info="Adjusts sensitivity of audio feature analysis." | |
| ) | |
| input_checkbox_component = gr.Checkbox( | |
| label="Enable Enhanced Analysis", | |
| info="Toggle for deeper audio feature extraction." | |
| ) | |
| # Define output components | |
| output_text_component = gr.Textbox( | |
| label="Analysis Results", | |
| info="Text results including audio feature analysis." | |
| ) | |
| output_image_component = gr.Image( | |
| label="Processed Image (if any)", | |
| info="Processed image output (if provided)." | |
| ) | |
| output_audio_component = gr.Audio( | |
| label="Downloaded Audio", | |
| type="filepath", | |
| info="Audio downloaded from YouTube." | |
| ) | |
| output_label_component = gr.Label( | |
| label="Analysis Summary", | |
| info="Feature analysis details and processing info." | |
| ) | |
| # Create the Gradio interface | |
| iface = gr.Interface( | |
| fn=analyze_audio, | |
| inputs=[ | |
| input_youtube_url, | |
| input_text_component, | |
| input_image_component, | |
| input_slider_component, | |
| input_checkbox_component | |
| ], | |
| outputs=[ | |
| output_text_component, | |
| output_image_component, | |
| output_audio_component, | |
| output_label_component | |
| ], | |
| title="YouTube Audio Feature Analysis", | |
| description="Download YouTube audio, analyze features with librosa, and process text/image inputs. Customize with slider and checkbox.", | |
| examples=[ | |
| ["https://www.youtube.com/watch?v=dQw4w9WgXcQ", "Analyze this track", None, 75, True], | |
| [None, "Describe a music track", None, 30, False], | |
| ["https://www.youtube.com/watch?v=9bZkp7q19f0", "Extract audio features", None, 60, True] | |
| ], | |
| allow_flagging="never", | |
| theme=gr.themes.Soft() | |
| ) | |
| if __name__ == "__main__": | |
| iface.launch() |