Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import os | |
| from google import genai | |
| from google.genai import types | |
| DEFAULT_PROMPT = """ | |
| You are a phoneme-precise music transcriptionist and sonic prompt architect. Your task is to analyze the provided audio and return a Suno-compatible output consisting of: | |
| β’ Structured section headers for musical form | |
| β’ Phonemically accurate lyrics (if vocals are present) | |
| β’ A detailed [Suno] metadata block capturing the sonic, emotional, and stylistic identity of the track | |
| ββββββββββββββββββββββββββββ | |
| 1. VOCALS (If Present) | |
| ββββββββββββββββββββββββββββ | |
| - Transcribe lyrics as performed, using stylized phonemic spelling (e.g., vowel extensions, pitch-inflected phrasing) | |
| - Use parentheses ( ) only for short adlibs, interjections, or vocal textures | |
| - Never include meta-descriptions, labels, or invented content | |
| - Use structural section tags in all caps and brackets: [INTRO], [VERSE], [HOOK], [OUTRO], etc. | |
| - If gendered vocals are present, use tags like [MALE VERSE], [FEMALE HOOK] | |
| ββββββββββββββββββββββββββββ | |
| 2. INSTRUMENTAL-ONLY TRACKS | |
| ββββββββββββββββββββββββββββ | |
| - Still mark sections using musical structure tags | |
| - Leave sections empty or use stylized cues only if audible (e.g., (riser), (cowbell hit)) | |
| - Do not fabricate lyrics or write in narrative form | |
| ββββββββββββββββββββββββββββ | |
| 3. FINAL OUTPUT FORMAT | |
| ββββββββββββββββββββββββββββ | |
| After the lyrics or structure, output a [Suno] metadata block containing: | |
| [Suno] | |
| Style: (genre and subgenres) | |
| Mood: (emotional tone) | |
| Tempo: (BPM estimate) | |
| Key: (musical key if detectable) | |
| Vocals: (gender, style, effects) | |
| Delivery: (singing style, rap style, etc.) | |
| Instrumentation: (instruments and sounds used) | |
| Mix: (production style, reverb, compression, etc.) | |
| Structure: (song structure pattern) | |
| Use evocative, technically grounded language. Focus on textural qualities, performance style, emotional tone, and sonic distinctiveness. | |
| ββββββββββββββββββββββββββββ | |
| 4. OUTPUT CONSTRAINTS | |
| ββββββββββββββββββββββββββββ | |
| - Output **only** the lyrics/structure + the [Suno] block | |
| - No explanation, notes, or markdown | |
| - No placeholder tags, commentary, or repetition | |
| - All output must be parsable by a downstream music generation agent | |
| """ | |
| def analyze_media(media_input, media_type, api_key, custom_prompt): | |
| if not api_key or not api_key.strip(): | |
| return "β Please enter your Google AI API key" | |
| if media_type == "Audio" and media_input is None: | |
| return "β Please upload an audio file" | |
| elif media_type == "YouTube" and (not media_input or not media_input.strip()): | |
| return "β Please enter a YouTube URL" | |
| try: | |
| # Initialize client - exact syntax from docs | |
| client = genai.Client(api_key=api_key.strip()) | |
| # Use model from docs | |
| MODEL_ID = "gemini-2.0-flash" | |
| if media_type == "Audio": | |
| # Get file size | |
| file_size = os.path.getsize(media_input) | |
| if file_size > 20 * 1024 * 1024: # 20MB threshold from docs | |
| # Upload file - exact syntax from docs | |
| your_audio_file = client.files.upload(file=media_input) | |
| # Generate content - exact syntax from docs | |
| response = client.models.generate_content( | |
| model=MODEL_ID, | |
| contents=[ | |
| custom_prompt, | |
| your_audio_file, | |
| ] | |
| ) | |
| else: | |
| # Read file for inline | |
| with open(media_input, 'rb') as f: | |
| audio_data = f.read() | |
| # Inline method - exact syntax from docs | |
| response = client.models.generate_content( | |
| model=MODEL_ID, | |
| contents=[ | |
| custom_prompt, | |
| types.Part.from_bytes( | |
| data=audio_data, | |
| mime_type='audio/mp3', | |
| ) | |
| ] | |
| ) | |
| elif media_type == "YouTube": | |
| # YouTube analysis - exact syntax from docs | |
| youtube_url = media_input.strip() | |
| response = client.models.generate_content( | |
| model=MODEL_ID, | |
| contents=types.Content( | |
| parts=[ | |
| types.Part(text=custom_prompt), | |
| types.Part( | |
| file_data=types.FileData(file_uri=youtube_url) | |
| ) | |
| ] | |
| ) | |
| ) | |
| return response.text | |
| except Exception as e: | |
| return f"β Error: {str(e)}" | |
| # Simple interface with both audio and YouTube support | |
| with gr.Blocks() as demo: | |
| gr.Markdown("# π§ Audio & YouTube β Suno Analyzer") | |
| with gr.Row(): | |
| media_type = gr.Radio(["Audio", "YouTube"], value="Audio", label="Input Type") | |
| with gr.Row(): | |
| audio_input = gr.Audio(sources=["upload"], type="filepath", label="Audio File", visible=True) | |
| youtube_input = gr.Textbox(label="YouTube URL", placeholder="https://www.youtube.com/watch?v=...", visible=False) | |
| api_key = gr.Textbox(label="Google AI API Key", type="password") | |
| custom_prompt = gr.Textbox(value=DEFAULT_PROMPT, label="Prompt", lines=10) | |
| analyze_btn = gr.Button("Analyze") | |
| output = gr.Textbox(label="Results", lines=15) | |
| def update_inputs(choice): | |
| if choice == "Audio": | |
| return gr.update(visible=True), gr.update(visible=False) | |
| else: | |
| return gr.update(visible=False), gr.update(visible=True) | |
| media_type.change( | |
| fn=update_inputs, | |
| inputs=[media_type], | |
| outputs=[audio_input, youtube_input] | |
| ) | |
| def process_media(media_type, audio_file, youtube_url, api_key, prompt): | |
| if media_type == "Audio": | |
| return analyze_media(audio_file, "Audio", api_key, prompt) | |
| else: | |
| return analyze_media(youtube_url, "YouTube", api_key, prompt) | |
| analyze_btn.click( | |
| fn=process_media, | |
| inputs=[media_type, audio_input, youtube_input, api_key, custom_prompt], | |
| outputs=output | |
| ) | |
| demo.launch() |