import gradio as gr
import os
from google import genai
from google.genai import types

DEFAULT_PROMPT = """
You are a phoneme-precise music transcriptionist and sonic prompt architect. Your task is to analyze the provided audio and return a Suno-compatible output consisting of:
• Structured section headers for musical form  
• Phonemically accurate lyrics (if vocals are present)  
• A detailed [Suno] metadata block capturing the sonic, emotional, and stylistic identity of the track

────────────────────────────
1. VOCALS (If Present)
────────────────────────────
- Transcribe lyrics as performed, using stylized phonemic spelling (e.g., vowel extensions, pitch-inflected phrasing)  
- Use parentheses ( ) only for short adlibs, interjections, or vocal textures  
- Never include meta-descriptions, labels, or invented content  
- Use structural section tags in all caps and brackets: [INTRO], [VERSE], [HOOK], [OUTRO], etc.  
- If gendered vocals are present, use tags like [MALE VERSE], [FEMALE HOOK]

────────────────────────────
2. INSTRUMENTAL-ONLY TRACKS
────────────────────────────
- Still mark sections using musical structure tags  
- Leave sections empty or use stylized cues only if audible (e.g., (riser), (cowbell hit))  
- Do not fabricate lyrics or write in narrative form

────────────────────────────
3. FINAL OUTPUT FORMAT
────────────────────────────
After the lyrics or structure, output a [Suno] metadata block containing:

[Suno]  
Style: (genre and subgenres)
Mood: (emotional tone)
Tempo: (BPM estimate)
Key: (musical key if detectable)
Vocals: (gender, style, effects)
Delivery: (singing style, rap style, etc.)
Instrumentation: (instruments and sounds used)
Mix: (production style, reverb, compression, etc.)
Structure: (song structure pattern)

Use evocative, technically grounded language. Focus on textural qualities, performance style, emotional tone, and sonic distinctiveness.

────────────────────────────
4. OUTPUT CONSTRAINTS
────────────────────────────
- Output **only** the lyrics/structure + the [Suno] block  
- No explanation, notes, or markdown  
- No placeholder tags, commentary, or repetition  
- All output must be parsable by a downstream music generation agent
"""

def analyze_media(media_input, media_type, api_key, custom_prompt):
    if not api_key or not api_key.strip():
        return "❌ Please enter your Google AI API key"
    
    if media_type == "Audio" and media_input is None:
        return "❌ Please upload an audio file"
    elif media_type == "YouTube" and (not media_input or not media_input.strip()):
        return "❌ Please enter a YouTube URL"
    
    try:
        # Initialize client - exact syntax from docs
        client = genai.Client(api_key=api_key.strip())
        
        # Use model from docs
        MODEL_ID = "gemini-2.0-flash"
        
        if media_type == "Audio":
            # Get file size
            file_size = os.path.getsize(media_input)
            
            if file_size > 20 * 1024 * 1024:  # 20MB threshold from docs
                # Upload file - exact syntax from docs
                your_audio_file = client.files.upload(file=media_input)
                
                # Generate content - exact syntax from docs
                response = client.models.generate_content(
                    model=MODEL_ID,
                    contents=[
                        custom_prompt,
                        your_audio_file,
                    ]
                )
            else:
                # Read file for inline
                with open(media_input, 'rb') as f:
                    audio_data = f.read()
                
                # Inline method - exact syntax from docs
                response = client.models.generate_content(
                    model=MODEL_ID,
                    contents=[
                        custom_prompt,
                        types.Part.from_bytes(
                            data=audio_data,
                            mime_type='audio/mp3',
                        )
                    ]
                )
        
        elif media_type == "YouTube":
            # YouTube analysis - exact syntax from docs
            youtube_url = media_input.strip()
            
            response = client.models.generate_content(
                model=MODEL_ID,
                contents=types.Content(
                    parts=[
                        types.Part(text=custom_prompt),
                        types.Part(
                            file_data=types.FileData(file_uri=youtube_url)
                        )
                    ]
                )
            )
        
        return response.text
        
    except Exception as e:
        return f"❌ Error: {str(e)}"

# Simple interface with both audio and YouTube support
with gr.Blocks() as demo:
    gr.Markdown("# 🎧 Audio & YouTube → Suno Analyzer")
    
    with gr.Row():
        media_type = gr.Radio(["Audio", "YouTube"], value="Audio", label="Input Type")
    
    with gr.Row():
        audio_input = gr.Audio(sources=["upload"], type="filepath", label="Audio File", visible=True)
        youtube_input = gr.Textbox(label="YouTube URL", placeholder="https://www.youtube.com/watch?v=...", visible=False)
    
    api_key = gr.Textbox(label="Google AI API Key", type="password")
    custom_prompt = gr.Textbox(value=DEFAULT_PROMPT, label="Prompt", lines=10)
    
    analyze_btn = gr.Button("Analyze")
    output = gr.Textbox(label="Results", lines=15)
    
    def update_inputs(choice):
        if choice == "Audio":
            return gr.update(visible=True), gr.update(visible=False)
        else:
            return gr.update(visible=False), gr.update(visible=True)
    
    media_type.change(
        fn=update_inputs,
        inputs=[media_type],
        outputs=[audio_input, youtube_input]
    )
    
    def process_media(media_type, audio_file, youtube_url, api_key, prompt):
        if media_type == "Audio":
            return analyze_media(audio_file, "Audio", api_key, prompt)
        else:
            return analyze_media(youtube_url, "YouTube", api_key, prompt)
    
    analyze_btn.click(
        fn=process_media,
        inputs=[media_type, audio_input, youtube_input, api_key, custom_prompt],
        outputs=output
    )

demo.launch()