Text2Sing-DiffSinger / music_generator.py
Vaishnavi0404's picture
Update music_generator.py
eb54aff verified
raw
history blame
11 kB
import numpy as np
import librosa
import soundfile as sf
from music21 import chord, note, stream, tempo, instrument
import random
import os
def generate_accompaniment(emotion, sentiment_score, tempo=100, output_path="accompaniment.wav"):
"""
Generate musical accompaniment based on emotion and sentiment
Args:
emotion (str): Dominant emotion (Happy, Sad, Angry, Fear, Surprise)
sentiment_score (float): Sentiment score from -1 (negative) to 1 (positive)
tempo (int): Tempo in BPM
output_path (str): Path to save the audio file
Returns:
str: Path to the generated audio file
"""
# Choose scales and chords based on emotion
if emotion == "Happy" or sentiment_score > 0.3:
# Major scales for happy emotions
scales = [
['C4', 'D4', 'E4', 'F4', 'G4', 'A4', 'B4', 'C5'], # C major
['G3', 'A3', 'B3', 'C4', 'D4', 'E4', 'F#4', 'G4'] # G major
]
chord_progressions = [
[['C4', 'E4', 'G4'], ['G3', 'B3', 'D4'], ['A3', 'C4', 'E4'], ['F3', 'A3', 'C4']], # I-V-vi-IV
[['C4', 'E4', 'G4'], ['F3', 'A3', 'C4'], ['G3', 'B3', 'D4'], ['C4', 'E4', 'G4']] # I-IV-V-I
]
elif emotion == "Sad" or sentiment_score < -0.3:
# Minor scales for sad emotions
scales = [
['A3', 'B3', 'C4', 'D4', 'E4', 'F4', 'G4', 'A4'], # A minor
['D3', 'E3', 'F3', 'G3', 'A3', 'Bb3', 'C4', 'D4'] # D minor
]
chord_progressions = [
[['A3', 'C4', 'E4'], ['F3', 'A3', 'C4'], ['G3', 'B3', 'D4'], ['E3', 'G3', 'B3']], # i-VI-VII-v
[['A3', 'C4', 'E4'], ['D3', 'F3', 'A3'], ['F3', 'A3', 'C4'], ['E3', 'G3', 'B3']] # i-iv-VI-V
]
elif emotion == "Angry":
# Diminished and altered scales for angry emotions
scales = [
['E3', 'F3', 'G#3', 'A3', 'B3', 'C4', 'D#4', 'E4'], # E phrygian dominant
['B3', 'C4', 'D4', 'E4', 'F4', 'G4', 'A4', 'B4'] # B locrian
]
chord_progressions = [
[['E3', 'G#3', 'B3'], ['A3', 'C4', 'E4'], ['F3', 'A3', 'C4'], ['B3', 'D4', 'F4']],
[['E3', 'G#3', 'B3'], ['D3', 'F3', 'A3'], ['C3', 'E3', 'G3'], ['B2', 'D3', 'F3']]
]
else: # Fear, Surprise, or neutral
# Modal scales for other emotions
scales = [
['D3', 'E3', 'F3', 'G3', 'A3', 'B3', 'C4', 'D4'], # D dorian
['E3', 'F#3', 'G3', 'A3', 'B3', 'C#4', 'D4', 'E4'] # E dorian
]
chord_progressions = [
[['D3', 'F3', 'A3'], ['C3', 'E3', 'G3'], ['Bb2', 'D3', 'F3'], ['A2', 'C3', 'E3']],
[['E3', 'G3', 'B3'], ['A3', 'C4', 'E4'], ['D3', 'F#3', 'A3'], ['G3', 'B3', 'D4']]
]
# Choose a scale and chord progression randomly
scale = random.choice(scales)
progression = random.choice(chord_progressions)
# Create a music21 stream
s = stream.Stream()
# Set tempo
t = tempo.MetronomeMark(number=tempo)
s.append(t)
# Set instrument based on emotion
if emotion == "Happy":
inst = instrument.Piano()
elif emotion == "Sad":
inst = instrument.StringInstrument()
elif emotion == "Angry":
inst = instrument.ElectricGuitar()
else:
inst = instrument.Harp()
s.append(inst)
# Generate a simple chord progression
for i in range(4): # 4 measures
for chord_notes in progression:
# Create chord
c = chord.Chord(chord_notes)
c.quarterLength = 1.0 # Quarter note duration
s.append(c)
# Add a simple melody using the scale
melody_part = stream.Part()
melody_part.append(instrument.Flute())
# Generate melody based on emotion
for i in range(16): # 16 beats
if random.random() < 0.2: # 20% chance of rest
n = note.Rest()
else:
# Choose note from scale
if emotion == "Happy":
# More skips and jumps for happy
pitch = scale[random.randint(0, len(scale)-1)]
elif emotion == "Sad":
# More stepwise motion for sad
idx = min(max(0, int(np.random.normal(3, 1))), len(scale)-1)
pitch = scale[idx]
else:
pitch = random.choice(scale)
n = note.Note(pitch)
# Add articulation based on emotion
if emotion == "Angry":
n.volume.velocity = 100 # Louder
elif emotion == "Sad":
n.volume.velocity = 60 # Softer
# Set duration
if random.random() < 0.3: # 30% chance of half note
n.quarterLength = 2.0
else:
n.quarterLength = 1.0
melody_part.append(n)
s.append(melody_part)
# Export to MIDI
midi_path = "temp_midi.mid"
s.write('midi', fp=midi_path)
# Convert MIDI to audio using fluidsynth (if available)
try:
from midi2audio import FluidSynth
fs = FluidSynth()
fs.midi_to_audio(midi_path, output_path)
print(f"Musical accompaniment saved to {output_path}")
# Clean up midi file
if os.path.exists(midi_path):
os.remove(midi_path)
return output_path
except ImportError:
print("FluidSynth not available. Creating synthetic audio instead.")
# Create synthetic audio as fallback
return _generate_synthetic_audio(emotion, sentiment_score, tempo, output_path)
def _generate_synthetic_audio(emotion, sentiment_score, tempo, output_path):
"""Generate synthetic audio using numpy when FluidSynth is not available"""
# Convert tempo to seconds per beat
spb = 60.0 / tempo
# Sample rate
sr = 22050
# Duration in seconds (16 beats)
duration = spb * 16
# Total samples
total_samples = int(sr * duration)
# Frequencies based on emotion
if emotion == "Happy" or sentiment_score > 0.3:
# Major chord frequencies (C major: C, E, G)
freqs = [261.63, 329.63, 392.00]
elif emotion == "Sad" or sentiment_score < -0.3:
# Minor chord frequencies (A minor: A, C, E)
freqs = [220.00, 261.63, 329.63]
elif emotion == "Angry":
# Diminished chord (B diminished: B, D, F)
freqs = [246.94, 293.66, 349.23]
else:
# Suspended chord (D suspended: D, G, A)
freqs = [293.66, 392.00, 440.00]
# Generate a simple chord progression
audio = np.zeros(total_samples)
# Create 4 chords, each for 4 beats
for i in range(4):
chord_start = int(i * 4 * spb * sr)
chord_end = int((i + 1) * 4 * spb * sr)
# Shift base frequencies based on chord position
if i == 0:
freq_shift = 1.0 # Root
elif i == 1:
freq_shift = 5.0/4.0 # Fourth up
elif i == 2:
freq_shift = 6.0/5.0 # Minor third up from previous
else:
freq_shift = 4.0/3.0 # Perfect fourth up from root
# Create chord tones
chord_audio = np.zeros(chord_end - chord_start)
for freq in freqs:
# Create a time array for this segment
t = np.linspace(0, (chord_end - chord_start) / sr, chord_end - chord_start, False)
# Adjust frequency based on chord position
adjusted_freq = freq * freq_shift
# Generate sine wave
note = 0.2 * np.sin(2 * np.pi * adjusted_freq * t)
# Apply envelope
envelope = np.ones_like(t)
attack = int(0.02 * len(t)) # 2% attack
decay = int(0.1 * len(t)) # 10% decay
release = int(0.2 * len(t)) # 20% release
envelope[:attack] = np.linspace(0, 1, attack)
envelope[-release:] = np.linspace(1, 0, release)
# Apply envelope
note = note * envelope
# Add to chord
chord_audio += note
# Normalize chord
chord_audio = chord_audio / np.max(np.abs(chord_audio))
# Add to full audio
audio[chord_start:chord_end] += chord_audio
# Add a simple melody
melody_audio = np.zeros_like(audio)
# Generate a few melody notes based on emotion
note_duration = int(0.5 * spb * sr) # Eighth notes
if emotion == "Happy":
notes_per_measure = 4
elif emotion == "Sad":
notes_per_measure = 2
else:
notes_per_measure = 3
for measure in range(4):
for note_idx in range(notes_per_measure):
# Calculate start time for this note
start = measure * 4 * spb * sr + note_idx * (4 * spb * sr / notes_per_measure)
start = int(start)
# Note duration (with a small gap between notes)
end = start + int(0.9 * (4 * spb * sr / notes_per_measure))
if end > len(melody_audio):
end = len(melody_audio)
# Choose a frequency based on emotion
if emotion == "Happy":
freq = random.choice([392.00, 440.00, 493.88, 523.25]) # G4, A4, B4, C5
elif emotion == "Sad":
freq = random.choice([329.63, 349.23, 392.00, 440.00]) # E4, F4, G4, A4
else:
freq = random.choice([293.66, 329.63, 349.23, 392.00]) # D4, E4, F4, G4
# Create time array for this note
t = np.linspace(0, (end - start) / sr, end - start, False)
# Generate sine wave with some harmonics for richness
note_audio = 0.3 * np.sin(2 * np.pi * freq * t)
note_audio += 0.15 * np.sin(2 * np.pi * freq * 2 * t) # First harmonic
note_audio += 0.05 * np.sin(2 * np.pi * freq * 3 * t) # Second harmonic
# Apply envelope
envelope = np.ones_like(t)
attack = int(0.1 * len(t))
release = int(0.3 * len(t))
envelope[:attack] = np.linspace(0, 1, attack)
envelope[-release:] = np.linspace(1, 0, release)
note_audio = note_audio * envelope
# Add to melody
melody_audio[start:end] += note_audio
# Normalize melody
melody_audio = 0.6 * melody_audio / np.max(np.abs(melody_audio))
# Mix chord progression and melody
final_audio = audio + melody_audio
# Final normalization
final_audio = 0.9 * final_audio / np.max(np.abs(final_audio))
# Save audio file
sf.write(output_path, final_audio, sr)
print(f"Synthetic musical accompaniment saved to {output_path}")
return output_path