File size: 13,169 Bytes
cafce31
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14e5437
cafce31
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14e5437
cafce31
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14e5437
cafce31
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14e5437
cafce31
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
import os
import tempfile
from typing import Any, Dict, Optional

from gradio_client import Client, handle_file

from .audio_info import validate_audio_path


def understand_music(
    audio_path: Optional[str] = None,
    audio_file: Optional[bytes] = None,
    filename: str = "audio",
    prompt_text: str = "Describe this track in full detail - tell me the genre, tempo, and key, then dive into the instruments, production style, and overall mood it creates.",
    youtube_url: Optional[str] = None,
) -> Dict[str, Any]:
    """
    Analyze music using NVIDIA's Music-Flamingo Audio Language Model.

    This function uses the flamingo-3 model to provide detailed analysis of audio content,
    including genre, tempo, key, instrumentation, production style, and mood.

    Args:
        audio_path: Path to local audio file or URL (supports WAV, MP3, FLAC, M4A)
        audio_file: Raw audio bytes (alternative to audio_path)
        filename: Original filename for reference (used with audio_file)
        prompt_text: Custom prompt for analysis (default: comprehensive music description)
        youtube_url: YouTube URL as alternative audio source

    Returns:
        Dictionary with analysis results:
        {
            "analysis": "Detailed music analysis text",
            "audio_source": "path" or "bytes" or "youtube",
            "filename": "Original filename",
            "prompt": "Used prompt text",
            "status": "success" or "error",
            "error": "Error message if status is error"
        }

    Raises:
        ValueError: If neither audio_path, audio_file, nor youtube_url is provided
        FileNotFoundError: If audio_path doesn't exist
        RuntimeError: If API call fails or network issues occur

    Examples:
        # Basic analysis with local file
        result = understand_music(audio_path="song.mp3")
        print(result["analysis"])

        # Custom prompt for finding cut points
        result = understand_music(
            audio_path="song.mp3",
            prompt_text="Identify the best cutting points for editing - suggest specific time stamps where verses, choruses, and bridges begin and end."
        )

        # Analysis with YouTube URL
        result = understand_music(
            youtube_url="https://youtube.com/watch?v=example",
            prompt_text="Analyze the structure and suggest optimal edit points."
        )
    """
    try:
        # Validate input parameters
        if not any([audio_path, audio_file, youtube_url]):
            raise ValueError(
                "Either audio_path, audio_file, or youtube_url must be provided"
            )

        # Handle different audio sources
        audio_source = None
        temp_file_path = None
        source_type = "unknown"
        source_filename = "unknown"

        try:
            if audio_path:
                # Validate and use local audio file
                validated_path = validate_audio_path(audio_path)
                audio_source = handle_file(validated_path)
                source_type = "path"
                source_filename = os.path.basename(validated_path)

            elif audio_file:
                # Save bytes to temporary file
                if not filename:
                    raise ValueError("Filename must be provided when using audio_file")

                # Create temporary file with appropriate extension
                temp_dir = tempfile.mkdtemp()
                if filename.lower().endswith((".wav", ".mp3", ".flac", ".m4a")):
                    temp_filename = filename
                else:
                    temp_filename = f"{filename}.wav"

                temp_file_path = os.path.join(temp_dir, temp_filename)

                with open(temp_file_path, "wb") as f:
                    f.write(audio_file)

                audio_source = handle_file(temp_file_path)
                source_type = "bytes"
                source_filename = filename

            elif youtube_url:
                # Use YouTube URL directly
                audio_source = youtube_url
                source_type = "youtube"
                source_filename = youtube_url

            # Initialize client and make prediction
            client = Client("nvidia/music-flamingo")

            result = client.predict(
                audio_path=audio_source,
                youtube_url=youtube_url if youtube_url else "",
                prompt_text=prompt_text,
                api_name="/infer",
            )

            return {
                "analysis": result,
                "audio_source": source_type,
                "filename": source_filename,
                "prompt": prompt_text,
                "status": "success",
            }

        finally:
            # Clean up temporary file if created
            if temp_file_path and os.path.exists(temp_file_path):
                os.unlink(temp_file_path)
                # Remove temp directory if empty
                temp_dir = os.path.dirname(temp_file_path)
                try:
                    os.rmdir(temp_dir)
                except OSError:
                    pass  # Directory not empty, leave it

    except Exception as e:
        return {
            "analysis": None,
            "audio_source": audio_path or "bytes" or youtube_url or "unknown",
            "filename": filename
            if audio_file
            else (os.path.basename(audio_path) if audio_path else youtube_url),
            "prompt": prompt_text,
            "status": "error",
            "error": str(e),
        }


def analyze_music_structure(
    audio_path: Optional[str] = None,
    audio_file: Optional[bytes] = None,
    filename: str = "audio",
    youtube_url: Optional[str] = None,
) -> Dict[str, Any]:
    """
    Analyze music structure and identify sections (verse, chorus, bridge, etc.).

    This function provides a focused analysis on song structure, making it ideal
    for understanding where to make cuts and edits.

    Args:
        audio_path: Path to local audio file or URL
        audio_file: Raw audio bytes
        filename: Original filename for reference
        youtube_url: YouTube URL as alternative audio source

    Returns:
        Dictionary with structure analysis results
    """
    structure_prompt = (
        "Analyze the structure of this music track. Identify and timestamp the different sections: "
        "intro, verses, choruses, pre-chorus, bridge, instrumental breaks, solo sections, and outro/outro. "
        "Provide specific time stamps (in MM:SS format) for where each section begins and ends. "
        "Also note any transitions, buildups, or breakdowns that would be important for editing."
    )

    return understand_music(
        audio_path=audio_path,
        audio_file=audio_file,
        filename=filename,
        prompt_text=structure_prompt,
        youtube_url=youtube_url,
    )


def suggest_cutting_points(
    audio_path: Optional[str] = None,
    audio_file: Optional[bytes] = None,
    filename: str = "audio",
    youtube_url: Optional[str] = None,
    purpose: str = "general",
) -> Dict[str, Any]:
    """
    Suggest optimal cutting points for audio editing.

    Args:
        audio_path: Path to local audio file or URL
        audio_file: Raw audio bytes
        filename: Original filename for reference
        youtube_url: YouTube URL as alternative audio source
        purpose: Purpose of cutting ('general', 'dj_mix', 'social_media', 'ringtone')

    Returns:
        Dictionary with cutting point suggestions
    """
    purpose_prompts = {
        "general": (
            "Suggest the best cutting points for this track. Identify natural edit points where "
            "the music flows well for cuts. Provide timestamps in MM:SS format and explain why "
            "each point is good for editing (e.g., clean transitions, beat drops, phrase endings)."
        ),
        "dj_mix": (
            "Analyze this track for DJ mixing purposes. Identify the best intro and outro sections "
            "for beatmatching, suggest cue points for mixing, and provide timestamps for clean "
            "transitions. Focus on drum patterns, BPM consistency, and mixable sections."
        ),
        "social_media": (
            "Suggest cutting points for social media content (15-60 seconds). Identify the most "
            "engaging parts of the track, catchy hooks, or impactful moments. Provide timestamps "
            "for creating short, attention-grabbing clips."
        ),
        "ringtone": (
            "Identify the best 15-30 second sections for ringtones. Look for memorable melodies, "
            "catchy choruses, or distinctive instrumental parts. Provide timestamps and explain "
            "why each section would work well as a ringtone."
        ),
    }

    prompt = purpose_prompts.get(purpose, purpose_prompts["general"])

    return understand_music(
        audio_path=audio_path,
        audio_file=audio_file,
        filename=filename,
        prompt_text=prompt,
        youtube_url=youtube_url,
    )


def analyze_genre_and_style(
    audio_path: Optional[str] = None,
    audio_file: Optional[bytes] = None,
    filename: str = "audio",
    youtube_url: Optional[str] = None,
) -> Dict[str, Any]:
    """
    Provide detailed genre and production style analysis.

    Args:
        audio_path: Path to local audio file or URL
        audio_file: Raw audio bytes
        filename: Original filename for reference
        youtube_url: YouTube URL as alternative audio source

    Returns:
        Dictionary with genre and style analysis
    """
    genre_prompt = (
        "Provide a detailed analysis of this track's genre and production style. Identify the "
        "primary genre and any subgenres or fusion elements. Describe the production techniques, "
        "mixing style, sound design choices, and arrangement. Analyze the instrumentation, "
        "including both traditional and electronic elements. Discuss the era or period the music "
        "seems to draw inspiration from, and compare it to similar artists or tracks if applicable."
    )

    return understand_music(
        audio_path=audio_path,
        audio_file=audio_file,
        filename=filename,
        prompt_text=genre_prompt,
        youtube_url=youtube_url,
    )


if __name__ == "__main__":
    import argparse

    parser = argparse.ArgumentParser(
        description="Music understanding and analysis tools"
    )
    subparsers = parser.add_subparsers(dest="command", help="Available commands")

    # General understanding
    understand_parser = subparsers.add_parser(
        "understand", help="General music analysis"
    )
    understand_parser.add_argument("--audio", help="Path to audio file")
    understand_parser.add_argument("--prompt", help="Custom prompt text")
    understand_parser.add_argument("--youtube", help="YouTube URL")

    # Structure analysis
    structure_parser = subparsers.add_parser("structure", help="Analyze song structure")
    structure_parser.add_argument("--audio", help="Path to audio file")
    structure_parser.add_argument("--youtube", help="YouTube URL")

    # Cutting points
    cutting_parser = subparsers.add_parser("cutting", help="Suggest cutting points")
    cutting_parser.add_argument("--audio", help="Path to audio file")
    cutting_parser.add_argument(
        "--purpose",
        choices=["general", "dj_mix", "social_media", "ringtone"],
        default="general",
        help="Purpose of cutting",
    )
    cutting_parser.add_argument("--youtube", help="YouTube URL")

    # Genre analysis
    genre_parser = subparsers.add_parser("genre", help="Analyze genre and style")
    genre_parser.add_argument("--audio", help="Path to audio file")
    genre_parser.add_argument("--youtube", help="YouTube URL")

    args = parser.parse_args()

    try:
        if args.command == "understand":
            result = understand_music(
                audio_path=args.audio,
                youtube_url=args.youtube,
                prompt_text=args.prompt
                if args.prompt
                else "Describe this track in full detail - tell me the genre, tempo, and key, then dive into the instruments, production style, and overall mood it creates.",
            )

        elif args.command == "cutting":
            result = suggest_cutting_points(
                audio_path=args.audio, youtube_url=args.youtube, purpose=args.purpose
            )

        elif args.command == "genre":
            result = analyze_genre_and_style(
                audio_path=args.audio, youtube_url=args.youtube
            )

        else:
            parser.print_help()
            exit(1)

        # Output results
        if result["status"] == "success":
            print(f"Analysis for: {result['filename']}")
            print(f"Source: {result['audio_source']}")
            print(f"Prompt: {result['prompt']}")
            print("\n" + "=" * 50)
            print(result["analysis"])
        else:
            print(f"Error: {result['error']}")
            exit(1)

    except Exception as e:
        print(f"Error: {e}")
        exit(1)