Spaces:

frascuchon
/

music-mcp

Running on CPU Upgrade

App Files Files Community

music-mcp / tools /music_understanding.py

frascuchon HF Staff

audio_path documented

14e5437 19 days ago

raw

history blame contribute delete

13.2 kB

	import os
	import tempfile
	from typing import Any, Dict, Optional

	from gradio_client import Client, handle_file

	from .audio_info import validate_audio_path


	def understand_music(
	audio_path: Optional[str] = None,
	audio_file: Optional[bytes] = None,
	filename: str = "audio",
	prompt_text: str = "Describe this track in full detail - tell me the genre, tempo, and key, then dive into the instruments, production style, and overall mood it creates.",
	youtube_url: Optional[str] = None,
	) -> Dict[str, Any]:
	"""
	Analyze music using NVIDIA's Music-Flamingo Audio Language Model.

	This function uses the flamingo-3 model to provide detailed analysis of audio content,
	including genre, tempo, key, instrumentation, production style, and mood.

	Args:
	audio_path: Path to local audio file or URL (supports WAV, MP3, FLAC, M4A)
	audio_file: Raw audio bytes (alternative to audio_path)
	filename: Original filename for reference (used with audio_file)
	prompt_text: Custom prompt for analysis (default: comprehensive music description)
	youtube_url: YouTube URL as alternative audio source

	Returns:
	Dictionary with analysis results:
	{
	"analysis": "Detailed music analysis text",
	"audio_source": "path" or "bytes" or "youtube",
	"filename": "Original filename",
	"prompt": "Used prompt text",
	"status": "success" or "error",
	"error": "Error message if status is error"
	}

	Raises:
	ValueError: If neither audio_path, audio_file, nor youtube_url is provided
	FileNotFoundError: If audio_path doesn't exist
	RuntimeError: If API call fails or network issues occur

	Examples:
	# Basic analysis with local file
	result = understand_music(audio_path="song.mp3")
	print(result["analysis"])

	# Custom prompt for finding cut points
	result = understand_music(
	audio_path="song.mp3",
	prompt_text="Identify the best cutting points for editing - suggest specific time stamps where verses, choruses, and bridges begin and end."
	)

	# Analysis with YouTube URL
	result = understand_music(
	youtube_url="https://youtube.com/watch?v=example",
	prompt_text="Analyze the structure and suggest optimal edit points."
	)
	"""
	try:
	# Validate input parameters
	if not any([audio_path, audio_file, youtube_url]):
	raise ValueError(
	"Either audio_path, audio_file, or youtube_url must be provided"
	)

	# Handle different audio sources
	audio_source = None
	temp_file_path = None
	source_type = "unknown"
	source_filename = "unknown"

	try:
	if audio_path:
	# Validate and use local audio file
	validated_path = validate_audio_path(audio_path)
	audio_source = handle_file(validated_path)
	source_type = "path"
	source_filename = os.path.basename(validated_path)

	elif audio_file:
	# Save bytes to temporary file
	if not filename:
	raise ValueError("Filename must be provided when using audio_file")

	# Create temporary file with appropriate extension
	temp_dir = tempfile.mkdtemp()
	if filename.lower().endswith((".wav", ".mp3", ".flac", ".m4a")):
	temp_filename = filename
	else:
	temp_filename = f"{filename}.wav"

	temp_file_path = os.path.join(temp_dir, temp_filename)

	with open(temp_file_path, "wb") as f:
	f.write(audio_file)

	audio_source = handle_file(temp_file_path)
	source_type = "bytes"
	source_filename = filename

	elif youtube_url:
	# Use YouTube URL directly
	audio_source = youtube_url
	source_type = "youtube"
	source_filename = youtube_url

	# Initialize client and make prediction
	client = Client("nvidia/music-flamingo")

	result = client.predict(
	audio_path=audio_source,
	youtube_url=youtube_url if youtube_url else "",
	prompt_text=prompt_text,
	api_name="/infer",
	)

	return {
	"analysis": result,
	"audio_source": source_type,
	"filename": source_filename,
	"prompt": prompt_text,
	"status": "success",
	}

	finally:
	# Clean up temporary file if created
	if temp_file_path and os.path.exists(temp_file_path):
	os.unlink(temp_file_path)
	# Remove temp directory if empty
	temp_dir = os.path.dirname(temp_file_path)
	try:
	os.rmdir(temp_dir)
	except OSError:
	pass # Directory not empty, leave it

	except Exception as e:
	return {
	"analysis": None,
	"audio_source": audio_path or "bytes" or youtube_url or "unknown",
	"filename": filename
	if audio_file
	else (os.path.basename(audio_path) if audio_path else youtube_url),
	"prompt": prompt_text,
	"status": "error",
	"error": str(e),
	}


	def analyze_music_structure(
	audio_path: Optional[str] = None,
	audio_file: Optional[bytes] = None,
	filename: str = "audio",
	youtube_url: Optional[str] = None,
	) -> Dict[str, Any]:
	"""
	Analyze music structure and identify sections (verse, chorus, bridge, etc.).

	This function provides a focused analysis on song structure, making it ideal
	for understanding where to make cuts and edits.

	Args:
	audio_path: Path to local audio file or URL
	audio_file: Raw audio bytes
	filename: Original filename for reference
	youtube_url: YouTube URL as alternative audio source

	Returns:
	Dictionary with structure analysis results
	"""
	structure_prompt = (
	"Analyze the structure of this music track. Identify and timestamp the different sections: "
	"intro, verses, choruses, pre-chorus, bridge, instrumental breaks, solo sections, and outro/outro. "
	"Provide specific time stamps (in MM:SS format) for where each section begins and ends. "
	"Also note any transitions, buildups, or breakdowns that would be important for editing."
	)

	return understand_music(
	audio_path=audio_path,
	audio_file=audio_file,
	filename=filename,
	prompt_text=structure_prompt,
	youtube_url=youtube_url,
	)


	def suggest_cutting_points(
	audio_path: Optional[str] = None,
	audio_file: Optional[bytes] = None,
	filename: str = "audio",
	youtube_url: Optional[str] = None,
	purpose: str = "general",
	) -> Dict[str, Any]:
	"""
	Suggest optimal cutting points for audio editing.

	Args:
	audio_path: Path to local audio file or URL
	audio_file: Raw audio bytes
	filename: Original filename for reference
	youtube_url: YouTube URL as alternative audio source
	purpose: Purpose of cutting ('general', 'dj_mix', 'social_media', 'ringtone')

	Returns:
	Dictionary with cutting point suggestions
	"""
	purpose_prompts = {
	"general": (
	"Suggest the best cutting points for this track. Identify natural edit points where "
	"the music flows well for cuts. Provide timestamps in MM:SS format and explain why "
	"each point is good for editing (e.g., clean transitions, beat drops, phrase endings)."
	),
	"dj_mix": (
	"Analyze this track for DJ mixing purposes. Identify the best intro and outro sections "
	"for beatmatching, suggest cue points for mixing, and provide timestamps for clean "
	"transitions. Focus on drum patterns, BPM consistency, and mixable sections."
	),
	"social_media": (
	"Suggest cutting points for social media content (15-60 seconds). Identify the most "
	"engaging parts of the track, catchy hooks, or impactful moments. Provide timestamps "
	"for creating short, attention-grabbing clips."
	),
	"ringtone": (
	"Identify the best 15-30 second sections for ringtones. Look for memorable melodies, "
	"catchy choruses, or distinctive instrumental parts. Provide timestamps and explain "
	"why each section would work well as a ringtone."
	),
	}

	prompt = purpose_prompts.get(purpose, purpose_prompts["general"])

	return understand_music(
	audio_path=audio_path,
	audio_file=audio_file,
	filename=filename,
	prompt_text=prompt,
	youtube_url=youtube_url,
	)


	def analyze_genre_and_style(
	audio_path: Optional[str] = None,
	audio_file: Optional[bytes] = None,
	filename: str = "audio",
	youtube_url: Optional[str] = None,
	) -> Dict[str, Any]:
	"""
	Provide detailed genre and production style analysis.

	Args:
	audio_path: Path to local audio file or URL
	audio_file: Raw audio bytes
	filename: Original filename for reference
	youtube_url: YouTube URL as alternative audio source

	Returns:
	Dictionary with genre and style analysis
	"""
	genre_prompt = (
	"Provide a detailed analysis of this track's genre and production style. Identify the "
	"primary genre and any subgenres or fusion elements. Describe the production techniques, "
	"mixing style, sound design choices, and arrangement. Analyze the instrumentation, "
	"including both traditional and electronic elements. Discuss the era or period the music "
	"seems to draw inspiration from, and compare it to similar artists or tracks if applicable."
	)

	return understand_music(
	audio_path=audio_path,
	audio_file=audio_file,
	filename=filename,
	prompt_text=genre_prompt,
	youtube_url=youtube_url,
	)


	if __name__ == "__main__":
	import argparse

	parser = argparse.ArgumentParser(
	description="Music understanding and analysis tools"
	)
	subparsers = parser.add_subparsers(dest="command", help="Available commands")

	# General understanding
	understand_parser = subparsers.add_parser(
	"understand", help="General music analysis"
	)
	understand_parser.add_argument("--audio", help="Path to audio file")
	understand_parser.add_argument("--prompt", help="Custom prompt text")
	understand_parser.add_argument("--youtube", help="YouTube URL")

	# Structure analysis
	structure_parser = subparsers.add_parser("structure", help="Analyze song structure")
	structure_parser.add_argument("--audio", help="Path to audio file")
	structure_parser.add_argument("--youtube", help="YouTube URL")

	# Cutting points
	cutting_parser = subparsers.add_parser("cutting", help="Suggest cutting points")
	cutting_parser.add_argument("--audio", help="Path to audio file")
	cutting_parser.add_argument(
	"--purpose",
	choices=["general", "dj_mix", "social_media", "ringtone"],
	default="general",
	help="Purpose of cutting",
	)
	cutting_parser.add_argument("--youtube", help="YouTube URL")

	# Genre analysis
	genre_parser = subparsers.add_parser("genre", help="Analyze genre and style")
	genre_parser.add_argument("--audio", help="Path to audio file")
	genre_parser.add_argument("--youtube", help="YouTube URL")

	args = parser.parse_args()

	try:
	if args.command == "understand":
	result = understand_music(
	audio_path=args.audio,
	youtube_url=args.youtube,
	prompt_text=args.prompt
	if args.prompt
	else "Describe this track in full detail - tell me the genre, tempo, and key, then dive into the instruments, production style, and overall mood it creates.",
	)

	elif args.command == "cutting":
	result = suggest_cutting_points(
	audio_path=args.audio, youtube_url=args.youtube, purpose=args.purpose
	)

	elif args.command == "genre":
	result = analyze_genre_and_style(
	audio_path=args.audio, youtube_url=args.youtube
	)

	else:
	parser.print_help()
	exit(1)

	# Output results
	if result["status"] == "success":
	print(f"Analysis for: {result['filename']}")
	print(f"Source: {result['audio_source']}")
	print(f"Prompt: {result['prompt']}")
	print("\n" + "=" * 50)
	print(result["analysis"])
	else:
	print(f"Error: {result['error']}")
	exit(1)

	except Exception as e:
	print(f"Error: {e}")
	exit(1)