Spaces:

jacob-c
/

largermodel_lyrics_generation

Paused

largermodel_lyrics_generation / emotionanalysis.py

root

bea11aa 10 months ago

12.8 kB

	import librosa
	import numpy as np
	from scipy import signal
	from collections import Counter
	import warnings
	warnings.filterwarnings('ignore') # Suppress librosa warnings
	try:
	import matplotlib.pyplot as plt
	except ImportError:
	plt = None

	class MusicAnalyzer:
	def __init__(self):
	# Emotion coordinates (pop-optimized, more separation)
	self.emotion_classes = {
	'happy': {'valence': 0.96, 'arousal': 0.72},
	'excited': {'valence': 0.88, 'arousal': 0.96},
	'tender': {'valence': 0.70, 'arousal': 0.39},
	'calm': {'valence': 0.58, 'arousal': 0.18},
	'sad': {'valence': 0.18, 'arousal': 0.19},
	'depressed': {'valence': 0.09, 'arousal': 0.06},
	'angry': {'valence': 0.11, 'arousal': 0.80},
	'fearful': {'valence': 0.13, 'arousal': 0.99}
	}
	# More realistic pop theme mapping
	self.theme_classes = {
	'love': ['happy', 'excited', 'tender'],
	'triumph': ['excited', 'happy', 'angry'],
	'loss': ['sad', 'depressed'],
	'adventure': ['excited', 'fearful'],
	'reflection': ['calm', 'tender', 'sad'],
	'conflict': ['angry', 'fearful']
	}
	# Pop-tuned feature weights
	self.feature_weights = {
	'mode': 0.34,
	'tempo': 0.32,
	'energy': 0.16,
	'brightness': 0.14,
	'rhythm_complexity': 0.04
	}
	self.key_names = ['C', 'C#', 'D', 'D#', 'E', 'F', 'F#', 'G', 'G#', 'A', 'A#', 'B']

	def load_audio(self, file_path, sr=22050, duration=None):
	try:
	y, sr = librosa.load(file_path, sr=sr, duration=duration)
	return y, sr
	except Exception as e:
	print(f"Error loading audio file: {e}")
	return None, None

	def analyze_rhythm(self, y, sr):
	onset_env = librosa.onset.onset_strength(y=y, sr=sr)
	tempo, beat_frames = librosa.beat.beat_track(onset_envelope=onset_env, sr=sr)
	beat_times = librosa.frames_to_time(beat_frames, sr=sr)
	beat_intervals = np.diff(beat_times) if len(beat_times) > 1 else np.array([0])
	beat_regularity = 1.0 / np.std(beat_intervals) if len(beat_intervals) > 0 and np.std(beat_intervals) > 0 else 0
	ac = librosa.autocorrelate(onset_env, max_size=sr // 2)
	ac = librosa.util.normalize(ac, norm=np.inf)
	rhythm_intensity = np.mean(onset_env) / np.max(onset_env) if np.max(onset_env) > 0 else 0
	rhythm_complexity = np.std(onset_env) / np.mean(onset_env) if np.mean(onset_env) > 0 else 0
	beat_times_list = [float(t) for t in beat_times.tolist()]
	beat_intervals_list = [float(i) for i in beat_intervals.tolist()]
	return {
	"tempo": float(tempo),
	"beat_times": beat_times_list,
	"beat_intervals": beat_intervals_list,
	"beat_regularity": float(beat_regularity),
	"rhythm_intensity": float(rhythm_intensity),
	"rhythm_complexity": float(rhythm_complexity)
	}

	def analyze_tonality(self, y, sr):
	chroma = librosa.feature.chroma_cqt(y=y, sr=sr)
	major_profile = np.array([6.35, 2.23, 3.48, 2.33, 4.38, 4.09, 2.52, 5.19, 2.39, 3.66, 2.29, 2.88])
	minor_profile = np.array([6.33, 2.68, 3.52, 5.38, 2.60, 3.53, 2.54, 4.75, 3.98, 2.69, 3.34, 3.17])
	chroma_avg = np.mean(chroma, axis=1)
	major_corr = np.zeros(12)
	minor_corr = np.zeros(12)
	for i in range(12):
	major_corr[i] = np.corrcoef(np.roll(chroma_avg, i), major_profile)[0, 1]
	minor_corr[i] = np.corrcoef(np.roll(chroma_avg, i), minor_profile)[0, 1]
	max_major_idx = np.argmax(major_corr)
	max_minor_idx = np.argmax(minor_corr)
	if major_corr[max_major_idx] > minor_corr[max_minor_idx]:
	mode = "major"
	key = self.key_names[max_major_idx]
	else:
	mode = "minor"
	key = self.key_names[max_minor_idx]
	harmony_complexity = np.std(chroma) / np.mean(chroma) if np.mean(chroma) > 0 else 0
	tonal_stability = 1.0 / (np.std(chroma_avg) + 0.001)
	spectral_centroid = librosa.feature.spectral_centroid(y=y, sr=sr)[0]
	brightness = np.mean(spectral_centroid) / (sr / 2)
	spectral_contrast = librosa.feature.spectral_contrast(y=y, sr=sr)
	dissonance = np.mean(spectral_contrast[0])
	return {
	"key": key,
	"mode": mode,
	"is_major": mode == "major",
	"harmony_complexity": float(harmony_complexity),
	"tonal_stability": float(tonal_stability),
	"brightness": float(brightness),
	"dissonance": float(dissonance)
	}

	def analyze_energy(self, y, sr):
	rms = librosa.feature.rms(y=y)[0]
	mean_energy = np.mean(rms)
	energy_std = np.std(rms)
	energy_dynamic_range = np.max(rms) - np.min(rms) if len(rms) > 0 else 0
	spec = np.abs(librosa.stft(y))
	freq_bins = spec.shape[0]
	low_freq_energy = np.mean(spec[:int(freq_bins * 0.2), :])
	mid_freq_energy = np.mean(spec[int(freq_bins * 0.2):int(freq_bins * 0.8), :])
	high_freq_energy = np.mean(spec[int(freq_bins * 0.8):, :])
	total_energy = low_freq_energy + mid_freq_energy + high_freq_energy
	if total_energy > 0:
	low_freq_ratio = low_freq_energy / total_energy
	mid_freq_ratio = mid_freq_energy / total_energy
	high_freq_ratio = high_freq_energy / total_energy
	else:
	low_freq_ratio = mid_freq_ratio = high_freq_ratio = 1 / 3
	return {
	"mean_energy": float(mean_energy),
	"energy_std": float(energy_std),
	"energy_dynamic_range": float(energy_dynamic_range),
	"frequency_distribution": {
	"low_freq": float(low_freq_ratio),
	"mid_freq": float(mid_freq_ratio),
	"high_freq": float(high_freq_ratio)
	}
	}

	def feature_to_valence_arousal(self, features):
	# Normalization for typical pop values
	# tempo: 40-180 BPM, energy: 0.08-0.5 (librosa RMS), brightness: 0.25-0.7
	tempo_norm = np.clip((features['tempo'] - 70) / (170 - 70), 0, 1)
	energy_norm = np.clip((features['energy'] - 0.08) / (0.5 - 0.08), 0, 1)
	brightness_norm = np.clip((features['brightness'] - 0.25) / (0.7 - 0.25), 0, 1)
	rhythm_complexity_norm = np.clip((features['rhythm_complexity'] - 0.1) / (0.8 - 0.1), 0, 1)

	valence = (
	self.feature_weights['mode'] * (1.0 if features['is_major'] else 0.0) +
	self.feature_weights['tempo'] * tempo_norm +
	self.feature_weights['energy'] * energy_norm +
	self.feature_weights['brightness'] * brightness_norm
	)
	arousal = (
	self.feature_weights['tempo'] * tempo_norm +
	self.feature_weights['energy'] * energy_norm +
	self.feature_weights['brightness'] * brightness_norm +
	self.feature_weights['rhythm_complexity'] * rhythm_complexity_norm
	)

	# Explicit bias: if major mode + tempo > 100 + brightness > 0.5, boost valence/arousal toward happy/excited
	if features['is_major'] and features['tempo'] > 100 and features['brightness'] > 0.5:
	valence = max(valence, 0.85)
	arousal = max(arousal, 0.7)

	return float(np.clip(valence, 0, 1)), float(np.clip(arousal, 0, 1))

	def analyze_emotion(self, rhythm_data, tonal_data, energy_data):
	features = {
	'tempo': rhythm_data['tempo'],
	'energy': energy_data['mean_energy'],
	'is_major': tonal_data['is_major'],
	'brightness': tonal_data['brightness'],
	'rhythm_complexity': rhythm_data['rhythm_complexity']
	}
	valence, arousal = self.feature_to_valence_arousal(features)
	emotion_scores = {}
	for emotion, va in self.emotion_classes.items():
	dist = np.sqrt((valence - va['valence']) 2 + (arousal - va['arousal']) 2)
	emotion_scores[emotion] = 1.0 - dist # Higher = closer
	primary_emotion = max(emotion_scores.items(), key=lambda x: x[1])
	sorted_emotions = sorted(emotion_scores.items(), key=lambda x: x[1], reverse=True)
	secondary_emotion = sorted_emotions[1][0] if len(sorted_emotions) > 1 else None
	return {
	"primary_emotion": primary_emotion[0],
	"confidence": float(primary_emotion[1]),
	"emotion_scores": {k: float(v) for k, v in emotion_scores.items()},
	"valence": valence,
	"arousal": arousal,
	"secondary_emotion": secondary_emotion
	}

	def analyze_theme(self, rhythm_data, tonal_data, emotion_data):
	primary_emotion = emotion_data['primary_emotion']
	secondary_emotion = emotion_data.get('secondary_emotion')
	theme_scores = {}
	for theme, emolist in self.theme_classes.items():
	score = 0.0
	if primary_emotion in emolist:
	score += 0.7
	if secondary_emotion in emolist:
	score += 0.3
	harmony_complexity = tonal_data.get('harmony_complexity', 0.5)
	if theme in ['adventure', 'conflict']:
	score += 0.3 * np.clip((harmony_complexity - 0.4) / 0.6, 0, 1)
	elif theme in ['love', 'reflection']:
	score += 0.3 * np.clip((0.6 - harmony_complexity) / 0.6, 0, 1)
	theme_scores[theme] = float(np.clip(score, 0, 1))
	primary_theme = max(theme_scores.items(), key=lambda x: x[1])
	secondary_themes = [k for k, v in sorted(theme_scores.items(), key=lambda x: x[1], reverse=True)
	if k != primary_theme[0] and v > 0.5]
	return {
	"primary_theme": primary_theme[0],
	"confidence": primary_theme[1],
	"secondary_themes": secondary_themes[:2],
	"theme_scores": theme_scores
	}

	def analyze_music(self, file_path):
	y, sr = self.load_audio(file_path)
	if y is None:
	return {"error": "Failed to load audio file"}
	rhythm_data = self.analyze_rhythm(y, sr)
	tonal_data = self.analyze_tonality(y, sr)
	energy_data = self.analyze_energy(y, sr)
	emotion_data = self.analyze_emotion(rhythm_data, tonal_data, energy_data)
	theme_data = self.analyze_theme(rhythm_data, tonal_data, emotion_data)
	def convert_numpy_to_python(obj):
	if isinstance(obj, dict):
	return {k: convert_numpy_to_python(v) for k, v in obj.items()}
	elif isinstance(obj, list):
	return [convert_numpy_to_python(item) for item in obj]
	elif isinstance(obj, np.ndarray):
	return obj.tolist()
	elif isinstance(obj, np.number):
	return float(obj)
	else:
	return obj
	rhythm_data = convert_numpy_to_python(rhythm_data)
	tonal_data = convert_numpy_to_python(tonal_data)
	energy_data = convert_numpy_to_python(energy_data)
	emotion_data = convert_numpy_to_python(emotion_data)
	theme_data = convert_numpy_to_python(theme_data)
	return {
	"file": file_path,
	"rhythm_analysis": rhythm_data,
	"tonal_analysis": tonal_data,
	"energy_analysis": energy_data,
	"emotion_analysis": emotion_data,
	"theme_analysis": theme_data,
	"summary": {
	"tempo": float(rhythm_data["tempo"]),
	"primary_emotion": emotion_data["primary_emotion"],
	"primary_theme": theme_data["primary_theme"]
	}
	}

	# Create an instance of the analyzer
	analyzer = MusicAnalyzer()

	# The following code is for demonstration purposes only
	# and will only run if executed directly (not when imported)
	if __name__ == "__main__":
	# Replace this with a real audio file path when running as a script
	demo_file = "path/to/your/audio/file.mp3"

	# Analyze the uploaded audio file
	results = analyzer.analyze_music(demo_file)

	# Print analysis summary
	print("\n=== MUSIC ANALYSIS SUMMARY ===")
	print(f"Tempo: {results['summary']['tempo']:.1f} BPM")
	print(f"Primary Emotion: {results['summary']['primary_emotion']}")
	print(f"Primary Theme: {results['summary']['primary_theme']}")

	# Show detailed results (optional)
	import json
	print("\n=== DETAILED ANALYSIS ===")
	print(json.dumps(results, indent=2))

	# Visualize the analysis
	# analyzer.visualize_analysis(demo_file)