Spaces:

Vaishnavi0404
/

Text2Sing-DiffSinger

Running

App Files Files Community

Text2Sing-DiffSinger / music_generator.py

Vaishnavi0404

Update music_generator.py

eb54aff verified about 1 year ago

raw

history blame

11 kB

	import numpy as np
	import librosa
	import soundfile as sf
	from music21 import chord, note, stream, tempo, instrument
	import random
	import os

	def generate_accompaniment(emotion, sentiment_score, tempo=100, output_path="accompaniment.wav"):
	"""
	Generate musical accompaniment based on emotion and sentiment

	Args:
	emotion (str): Dominant emotion (Happy, Sad, Angry, Fear, Surprise)
	sentiment_score (float): Sentiment score from -1 (negative) to 1 (positive)
	tempo (int): Tempo in BPM
	output_path (str): Path to save the audio file

	Returns:
	str: Path to the generated audio file
	"""
	# Choose scales and chords based on emotion
	if emotion == "Happy" or sentiment_score > 0.3:
	# Major scales for happy emotions
	scales = [
	['C4', 'D4', 'E4', 'F4', 'G4', 'A4', 'B4', 'C5'], # C major
	['G3', 'A3', 'B3', 'C4', 'D4', 'E4', 'F#4', 'G4'] # G major
	]
	chord_progressions = [
	[['C4', 'E4', 'G4'], ['G3', 'B3', 'D4'], ['A3', 'C4', 'E4'], ['F3', 'A3', 'C4']], # I-V-vi-IV
	[['C4', 'E4', 'G4'], ['F3', 'A3', 'C4'], ['G3', 'B3', 'D4'], ['C4', 'E4', 'G4']] # I-IV-V-I
	]

	elif emotion == "Sad" or sentiment_score < -0.3:
	# Minor scales for sad emotions
	scales = [
	['A3', 'B3', 'C4', 'D4', 'E4', 'F4', 'G4', 'A4'], # A minor
	['D3', 'E3', 'F3', 'G3', 'A3', 'Bb3', 'C4', 'D4'] # D minor
	]
	chord_progressions = [
	[['A3', 'C4', 'E4'], ['F3', 'A3', 'C4'], ['G3', 'B3', 'D4'], ['E3', 'G3', 'B3']], # i-VI-VII-v
	[['A3', 'C4', 'E4'], ['D3', 'F3', 'A3'], ['F3', 'A3', 'C4'], ['E3', 'G3', 'B3']] # i-iv-VI-V
	]

	elif emotion == "Angry":
	# Diminished and altered scales for angry emotions
	scales = [
	['E3', 'F3', 'G#3', 'A3', 'B3', 'C4', 'D#4', 'E4'], # E phrygian dominant
	['B3', 'C4', 'D4', 'E4', 'F4', 'G4', 'A4', 'B4'] # B locrian
	]
	chord_progressions = [
	[['E3', 'G#3', 'B3'], ['A3', 'C4', 'E4'], ['F3', 'A3', 'C4'], ['B3', 'D4', 'F4']],
	[['E3', 'G#3', 'B3'], ['D3', 'F3', 'A3'], ['C3', 'E3', 'G3'], ['B2', 'D3', 'F3']]
	]

	else: # Fear, Surprise, or neutral
	# Modal scales for other emotions
	scales = [
	['D3', 'E3', 'F3', 'G3', 'A3', 'B3', 'C4', 'D4'], # D dorian
	['E3', 'F#3', 'G3', 'A3', 'B3', 'C#4', 'D4', 'E4'] # E dorian
	]
	chord_progressions = [
	[['D3', 'F3', 'A3'], ['C3', 'E3', 'G3'], ['Bb2', 'D3', 'F3'], ['A2', 'C3', 'E3']],
	[['E3', 'G3', 'B3'], ['A3', 'C4', 'E4'], ['D3', 'F#3', 'A3'], ['G3', 'B3', 'D4']]
	]

	# Choose a scale and chord progression randomly
	scale = random.choice(scales)
	progression = random.choice(chord_progressions)

	# Create a music21 stream
	s = stream.Stream()

	# Set tempo
	t = tempo.MetronomeMark(number=tempo)
	s.append(t)

	# Set instrument based on emotion
	if emotion == "Happy":
	inst = instrument.Piano()
	elif emotion == "Sad":
	inst = instrument.StringInstrument()
	elif emotion == "Angry":
	inst = instrument.ElectricGuitar()
	else:
	inst = instrument.Harp()

	s.append(inst)

	# Generate a simple chord progression
	for i in range(4): # 4 measures
	for chord_notes in progression:
	# Create chord
	c = chord.Chord(chord_notes)
	c.quarterLength = 1.0 # Quarter note duration
	s.append(c)

	# Add a simple melody using the scale
	melody_part = stream.Part()
	melody_part.append(instrument.Flute())

	# Generate melody based on emotion
	for i in range(16): # 16 beats
	if random.random() < 0.2: # 20% chance of rest
	n = note.Rest()
	else:
	# Choose note from scale
	if emotion == "Happy":
	# More skips and jumps for happy
	pitch = scale[random.randint(0, len(scale)-1)]
	elif emotion == "Sad":
	# More stepwise motion for sad
	idx = min(max(0, int(np.random.normal(3, 1))), len(scale)-1)
	pitch = scale[idx]
	else:
	pitch = random.choice(scale)

	n = note.Note(pitch)

	# Add articulation based on emotion
	if emotion == "Angry":
	n.volume.velocity = 100 # Louder
	elif emotion == "Sad":
	n.volume.velocity = 60 # Softer

	# Set duration
	if random.random() < 0.3: # 30% chance of half note
	n.quarterLength = 2.0
	else:
	n.quarterLength = 1.0

	melody_part.append(n)

	s.append(melody_part)

	# Export to MIDI
	midi_path = "temp_midi.mid"
	s.write('midi', fp=midi_path)

	# Convert MIDI to audio using fluidsynth (if available)
	try:
	from midi2audio import FluidSynth
	fs = FluidSynth()
	fs.midi_to_audio(midi_path, output_path)

	print(f"Musical accompaniment saved to {output_path}")

	# Clean up midi file
	if os.path.exists(midi_path):
	os.remove(midi_path)

	return output_path

	except ImportError:
	print("FluidSynth not available. Creating synthetic audio instead.")
	# Create synthetic audio as fallback
	return _generate_synthetic_audio(emotion, sentiment_score, tempo, output_path)

	def _generate_synthetic_audio(emotion, sentiment_score, tempo, output_path):
	"""Generate synthetic audio using numpy when FluidSynth is not available"""
	# Convert tempo to seconds per beat
	spb = 60.0 / tempo

	# Sample rate
	sr = 22050

	# Duration in seconds (16 beats)
	duration = spb * 16

	# Total samples
	total_samples = int(sr * duration)

	# Frequencies based on emotion
	if emotion == "Happy" or sentiment_score > 0.3:
	# Major chord frequencies (C major: C, E, G)
	freqs = [261.63, 329.63, 392.00]
	elif emotion == "Sad" or sentiment_score < -0.3:
	# Minor chord frequencies (A minor: A, C, E)
	freqs = [220.00, 261.63, 329.63]
	elif emotion == "Angry":
	# Diminished chord (B diminished: B, D, F)
	freqs = [246.94, 293.66, 349.23]
	else:
	# Suspended chord (D suspended: D, G, A)
	freqs = [293.66, 392.00, 440.00]

	# Generate a simple chord progression
	audio = np.zeros(total_samples)

	# Create 4 chords, each for 4 beats
	for i in range(4):
	chord_start = int(i * 4 * spb * sr)
	chord_end = int((i + 1) * 4 * spb * sr)

	# Shift base frequencies based on chord position
	if i == 0:
	freq_shift = 1.0 # Root
	elif i == 1:
	freq_shift = 5.0/4.0 # Fourth up
	elif i == 2:
	freq_shift = 6.0/5.0 # Minor third up from previous
	else:
	freq_shift = 4.0/3.0 # Perfect fourth up from root

	# Create chord tones
	chord_audio = np.zeros(chord_end - chord_start)

	for freq in freqs:
	# Create a time array for this segment
	t = np.linspace(0, (chord_end - chord_start) / sr, chord_end - chord_start, False)

	# Adjust frequency based on chord position
	adjusted_freq = freq * freq_shift

	# Generate sine wave
	note = 0.2 * np.sin(2 * np.pi * adjusted_freq * t)

	# Apply envelope
	envelope = np.ones_like(t)
	attack = int(0.02 * len(t)) # 2% attack
	decay = int(0.1 * len(t)) # 10% decay
	release = int(0.2 * len(t)) # 20% release

	envelope[:attack] = np.linspace(0, 1, attack)
	envelope[-release:] = np.linspace(1, 0, release)

	# Apply envelope
	note = note * envelope

	# Add to chord
	chord_audio += note

	# Normalize chord
	chord_audio = chord_audio / np.max(np.abs(chord_audio))

	# Add to full audio
	audio[chord_start:chord_end] += chord_audio

	# Add a simple melody
	melody_audio = np.zeros_like(audio)

	# Generate a few melody notes based on emotion
	note_duration = int(0.5 * spb * sr) # Eighth notes

	if emotion == "Happy":
	notes_per_measure = 4
	elif emotion == "Sad":
	notes_per_measure = 2
	else:
	notes_per_measure = 3

	for measure in range(4):
	for note_idx in range(notes_per_measure):
	# Calculate start time for this note
	start = measure * 4 * spb * sr + note_idx * (4 * spb * sr / notes_per_measure)
	start = int(start)

	# Note duration (with a small gap between notes)
	end = start + int(0.9 * (4 * spb * sr / notes_per_measure))

	if end > len(melody_audio):
	end = len(melody_audio)

	# Choose a frequency based on emotion
	if emotion == "Happy":
	freq = random.choice([392.00, 440.00, 493.88, 523.25]) # G4, A4, B4, C5
	elif emotion == "Sad":
	freq = random.choice([329.63, 349.23, 392.00, 440.00]) # E4, F4, G4, A4
	else:
	freq = random.choice([293.66, 329.63, 349.23, 392.00]) # D4, E4, F4, G4

	# Create time array for this note
	t = np.linspace(0, (end - start) / sr, end - start, False)

	# Generate sine wave with some harmonics for richness
	note_audio = 0.3 * np.sin(2 * np.pi * freq * t)
	note_audio += 0.15 * np.sin(2 * np.pi * freq * 2 * t) # First harmonic
	note_audio += 0.05 * np.sin(2 * np.pi * freq * 3 * t) # Second harmonic

	# Apply envelope
	envelope = np.ones_like(t)
	attack = int(0.1 * len(t))
	release = int(0.3 * len(t))

	envelope[:attack] = np.linspace(0, 1, attack)
	envelope[-release:] = np.linspace(1, 0, release)

	note_audio = note_audio * envelope

	# Add to melody
	melody_audio[start:end] += note_audio

	# Normalize melody
	melody_audio = 0.6 * melody_audio / np.max(np.abs(melody_audio))

	# Mix chord progression and melody
	final_audio = audio + melody_audio

	# Final normalization
	final_audio = 0.9 * final_audio / np.max(np.abs(final_audio))

	# Save audio file
	sf.write(output_path, final_audio, sr)

	print(f"Synthetic musical accompaniment saved to {output_path}")

	return output_path