Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| import librosa | |
| import numpy as np | |
| import requests | |
| from gradio.outputs import Video | |
| from video_generator import generate_video | |
| def extract_lyrics(api_response): | |
| words_timing = api_response["results"]["channels"][0]["alternatives"][0]["words"] | |
| lyrics_with_timing = [] | |
| CHUNK_DURATION = 10 | |
| current_chunk = "" | |
| current_chunk_start_time = 0 | |
| for word_info in words_timing: | |
| word = word_info["word"] | |
| start_time = word_info["start"] | |
| if start_time >= current_chunk_start_time + CHUNK_DURATION: | |
| end_time = word_info["end"] | |
| lyrics_with_timing.append((current_chunk_start_time, end_time, current_chunk.strip())) | |
| current_chunk = "" | |
| current_chunk_start_time += CHUNK_DURATION | |
| current_chunk += " " + word | |
| lyrics_with_timing.append((current_chunk_start_time, words_timing[-1]["end"], current_chunk.strip())) | |
| return lyrics_with_timing | |
| def send_to_deepgram(audio_file_path): | |
| # Update with your Deepgram API endpoint and key | |
| endpoint = "https://api.deepgram.com/v1/listen" | |
| headers = { | |
| "Authorization": "Token 2114fe20a6bdccf930f9a7fd1931958f063745d7" | |
| } | |
| with open(audio_file_path, 'rb') as audio_file : | |
| audio_data = audio_file.read() | |
| response = requests.post(endpoint, headers=headers, data=audio_data) | |
| response_json = response.json() | |
| print("Deepgram API Response:", response_json) # Log the response | |
| return response_json | |
| def analyze_audio(audio_file_path): | |
| print("Analyzing audio...") # Log start of analysis | |
| last_frame = None | |
| y, sr = librosa.load(audio_file_path) | |
| chunk_length = 10 * sr # 10 seconds | |
| moods = [] | |
| deepgram_response = send_to_deepgram(audio_file_path) | |
| lyrics_chunks = extract_lyrics(deepgram_response) | |
| for start in range(0, len(y), chunk_length): | |
| chunk = y[start:start + chunk_length] | |
| mood, _, _, _, _, _ = analyze_chunk(chunk) | |
| moods.append(mood) | |
| for i, start in enumerate(range(0, len(y), chunk_length)): | |
| print(f"Analyzing chunk {i + 1}...") # Log chunk analysis | |
| chunk = y[start:start + chunk_length] | |
| lyrics_summary = lyrics_chunks[i] if i < len(lyrics_chunks) else 'Instrumental or silence' | |
| previous_mood = moods[i - 1] if i > 0 else None | |
| current_mood = moods[i] | |
| next_mood = moods[i + 1] if i < len(moods) - 1 else None | |
| _, tempo, chroma_mean, spectral_contrast_mean, zero_crossing_rate_mean, mfcc_mean = analyze_chunk(chunk) | |
| prompt = generate_video_prompt(previous_mood, current_mood, next_mood, tempo, chroma_mean, spectral_contrast_mean, zero_crossing_rate_mean, mfcc_mean, lyrics_summary) | |
| description = f"Chunk starting at {start / sr} seconds:<br>Mood: {current_mood}<br>Video Prompt: {prompt}<br><br>" | |
| print(f"Generating video for chunk {i + 1}...") | |
| lyrics_with_timing = extract_lyrics(deepgram_response) | |
| video = generate_video(lyrics_with_timing, last_frame) | |
| #last_frame = extract_last_frame(video) | |
| print(f"Description for chunk {i + 1}: {description}") | |
| print(f"Video for chunk {i + 1}: {video}") | |
| # Yield the result for this chunk | |
| yield (description, video) | |
| def analyze_chunk(chunk): | |
| tempo, _ = librosa.beat.beat_track(y=chunk) | |
| chroma_mean = np.mean(librosa.feature.chroma_stft(y=chunk)) | |
| spectral_contrast_mean = np.mean(librosa.feature.spectral_contrast(y=chunk)) | |
| zero_crossing_rate_mean = np.mean(librosa.feature.zero_crossing_rate(chunk)) | |
| mfcc_mean = np.mean(librosa.feature.mfcc(y=chunk)) | |
| mood = analyze_mood(tempo, chroma_mean, spectral_contrast_mean, zero_crossing_rate_mean, mfcc_mean) | |
| return mood, tempo, chroma_mean, spectral_contrast_mean, zero_crossing_rate_mean, mfcc_mean | |
| def analyze_mood(tempo, chroma_mean, spectral_contrast_mean, zero_crossing_rate_mean, mfcc_mean): | |
| # Happy Mood | |
| if tempo > 110 and chroma_mean > 0.4: | |
| return 'Happy' | |
| # Sad Mood | |
| elif tempo < 90 and chroma_mean < 0.5 and mfcc_mean < 0: | |
| return 'Sad' | |
| # Energetic Mood | |
| elif tempo > 130 and zero_crossing_rate_mean > 0.05: | |
| return 'Energetic' | |
| # Relaxed Mood | |
| elif tempo < 100 and chroma_mean > 0.3 and spectral_contrast_mean > 15: | |
| return 'Relaxed' | |
| # Romantic Mood | |
| elif tempo < 100 and chroma_mean > 0.5: | |
| return 'Romantic' | |
| # Nostalgic Mood | |
| elif tempo < 100 and chroma_mean < 0.5 and spectral_contrast_mean < 25: | |
| return 'Nostalgic' | |
| # Tense Mood | |
| elif 100 <= tempo <= 130 and chroma_mean < 0.5 and spectral_contrast_mean > 20: | |
| return 'Tense' | |
| # Dreamy Mood | |
| elif tempo < 80 and chroma_mean > 0.4: | |
| return 'Dreamy' | |
| # Aggressive Mood | |
| elif tempo > 140 and zero_crossing_rate_mean > 0.08: | |
| return 'Aggressive' | |
| # Neutral Mood (Catch-all) | |
| else: | |
| return 'Neutral' | |
| def describe_tempo(tempo): | |
| if tempo < 60: | |
| return "a very slow" | |
| elif tempo < 90: | |
| return "a slow" | |
| elif tempo < 120: | |
| return "a moderate" | |
| elif tempo < 150: | |
| return "a lively" | |
| else: | |
| return "a fast" | |
| def generate_video_prompt(previous_mood, current_mood, next_mood, tempo, chroma_mean, spectral_contrast_mean, zero_crossing_rate_mean, mfcc_mean, lyrics_summary): | |
| rhythm_description = "energetic rhythm" if zero_crossing_rate_mean > 0.05 else "smooth rhythm" | |
| tonal_quality = "bright tones" if chroma_mean > 0.5 else "mellow tones" | |
| spectral_description = "sharp contrasts" if spectral_contrast_mean > 20 else "soft contrasts" | |
| tempo_description = describe_tempo(tempo) | |
| transition_description = "" | |
| if previous_mood: | |
| transition_description += f"Transition from a {previous_mood.lower()} mood. " | |
| if next_mood: | |
| transition_description += f"Prepare to transition to a {next_mood.lower()} mood. " | |
| prompt = ( | |
| f"Essence of a {current_mood.lower()} mood. " | |
| f"{transition_description}" | |
| f"Showcase a scene with {rhythm_description}, {tonal_quality}, and {spectral_description}. " | |
| f"Visualize {tempo_description} tempo. " # Updated line | |
| f"Narrative based on the lyrics: '{lyrics_summary}'. " | |
| f"Emphasize the themes and emotions conveyed in the song." | |
| ) | |
| return prompt | |
| # Define Gradio interface | |
| gr.Interface( | |
| fn=analyze_audio, | |
| inputs=gr.Audio(type="filepath"), | |
| outputs=[gr.HTML(), Video()], | |
| ).launch() |