Vaishnavi0404 commited on
Commit
0c413ab
·
verified ·
1 Parent(s): 5667d64

Update music_generator.py

Browse files
Files changed (1) hide show
  1. music_generator.py +135 -280
music_generator.py CHANGED
@@ -1,308 +1,163 @@
 
1
  import numpy as np
 
2
  import librosa
3
  import soundfile as sf
4
- import music21
5
- from music21 import chord, note, stream, instrument
6
- import random
7
- import os
8
- from music21 import tempo
9
 
10
- def generate_accompaniment(emotion, sentiment_score, tempo=100, output_path="accompaniment.wav"):
11
- """
12
- Generate musical accompaniment based on emotion and sentiment
13
-
14
- Args:
15
- emotion (str): Dominant emotion (Happy, Sad, Angry, Fear, Surprise)
16
- sentiment_score (float): Sentiment score from -1 (negative) to 1 (positive)
17
- tempo (int): Tempo in BPM
18
- output_path (str): Path to save the audio file
19
-
20
- Returns:
21
- str: Path to the generated audio file
22
- """
23
- # Choose scales and chords based on emotion
24
- if emotion == "Happy" or sentiment_score > 0.3:
25
- # Major scales for happy emotions
26
- scales = [
27
- ['C4', 'D4', 'E4', 'F4', 'G4', 'A4', 'B4', 'C5'], # C major
28
- ['G3', 'A3', 'B3', 'C4', 'D4', 'E4', 'F#4', 'G4'] # G major
29
- ]
30
- chord_progressions = [
31
- [['C4', 'E4', 'G4'], ['G3', 'B3', 'D4'], ['A3', 'C4', 'E4'], ['F3', 'A3', 'C4']], # I-V-vi-IV
32
- [['C4', 'E4', 'G4'], ['F3', 'A3', 'C4'], ['G3', 'B3', 'D4'], ['C4', 'E4', 'G4']] # I-IV-V-I
33
- ]
34
-
35
- elif emotion == "Sad" or sentiment_score < -0.3:
36
- # Minor scales for sad emotions
37
- scales = [
38
- ['A3', 'B3', 'C4', 'D4', 'E4', 'F4', 'G4', 'A4'], # A minor
39
- ['D3', 'E3', 'F3', 'G3', 'A3', 'Bb3', 'C4', 'D4'] # D minor
40
- ]
41
- chord_progressions = [
42
- [['A3', 'C4', 'E4'], ['F3', 'A3', 'C4'], ['G3', 'B3', 'D4'], ['E3', 'G3', 'B3']], # i-VI-VII-v
43
- [['A3', 'C4', 'E4'], ['D3', 'F3', 'A3'], ['F3', 'A3', 'C4'], ['E3', 'G3', 'B3']] # i-iv-VI-V
44
- ]
45
-
46
- elif emotion == "Angry":
47
- # Diminished and altered scales for angry emotions
48
- scales = [
49
- ['E3', 'F3', 'G#3', 'A3', 'B3', 'C4', 'D#4', 'E4'], # E phrygian dominant
50
- ['B3', 'C4', 'D4', 'E4', 'F4', 'G4', 'A4', 'B4'] # B locrian
51
- ]
52
- chord_progressions = [
53
- [['E3', 'G#3', 'B3'], ['A3', 'C4', 'E4'], ['F3', 'A3', 'C4'], ['B3', 'D4', 'F4']],
54
- [['E3', 'G#3', 'B3'], ['D3', 'F3', 'A3'], ['C3', 'E3', 'G3'], ['B2', 'D3', 'F3']]
55
- ]
56
 
57
- else: # Fear, Surprise, or neutral
58
- # Modal scales for other emotions
59
- scales = [
60
- ['D3', 'E3', 'F3', 'G3', 'A3', 'B3', 'C4', 'D4'], # D dorian
61
- ['E3', 'F#3', 'G3', 'A3', 'B3', 'C#4', 'D4', 'E4'] # E dorian
62
- ]
63
- chord_progressions = [
64
- [['D3', 'F3', 'A3'], ['C3', 'E3', 'G3'], ['Bb2', 'D3', 'F3'], ['A2', 'C3', 'E3']],
65
- [['E3', 'G3', 'B3'], ['A3', 'C4', 'E4'], ['D3', 'F#3', 'A3'], ['G3', 'B3', 'D4']]
66
- ]
67
-
68
- # Choose a scale and chord progression randomly
69
- scale = random.choice(scales)
70
- progression = random.choice(chord_progressions)
71
-
72
- # Create a music21 stream
73
- s = stream.Stream()
74
-
75
- # Set tempo
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
76
  t = music21.tempo.MetronomeMark(number=tempo_value)
77
- s.append(t)
78
 
79
- # Set instrument based on emotion
80
- if emotion == "Happy":
81
- inst = instrument.Piano()
82
- elif emotion == "Sad":
83
- inst = instrument.StringInstrument()
84
- elif emotion == "Angry":
85
- inst = instrument.ElectricGuitar()
86
- else:
87
- inst = instrument.Harp()
88
-
89
- s.append(inst)
90
 
91
- # Generate a simple chord progression
92
- for i in range(4): # 4 measures
93
- for chord_notes in progression:
94
- # Create chord
95
- c = chord.Chord(chord_notes)
96
- c.quarterLength = 1.0 # Quarter note duration
97
- s.append(c)
98
 
99
- # Add a simple melody using the scale
100
- melody_part = stream.Part()
101
- melody_part.append(instrument.Flute())
102
 
103
- # Generate melody based on emotion
104
- for i in range(16): # 16 beats
105
- if random.random() < 0.2: # 20% chance of rest
106
- n = note.Rest()
107
- else:
108
- # Choose note from scale
109
- if emotion == "Happy":
110
- # More skips and jumps for happy
111
- pitch = scale[random.randint(0, len(scale)-1)]
112
- elif emotion == "Sad":
113
- # More stepwise motion for sad
114
- idx = min(max(0, int(np.random.normal(3, 1))), len(scale)-1)
115
- pitch = scale[idx]
116
- else:
117
- pitch = random.choice(scale)
118
-
119
- n = note.Note(pitch)
120
-
121
- # Add articulation based on emotion
122
- if emotion == "Angry":
123
- n.volume.velocity = 100 # Louder
124
- elif emotion == "Sad":
125
- n.volume.velocity = 60 # Softer
126
-
127
- # Set duration
128
- if random.random() < 0.3: # 30% chance of half note
129
- n.quarterLength = 2.0
130
- else:
131
- n.quarterLength = 1.0
132
-
133
  melody_part.append(n)
134
 
135
- s.append(melody_part)
 
136
 
137
- # Export to MIDI
138
- midi_path = "temp_midi.mid"
139
- s.write('midi', fp=midi_path)
 
 
 
 
 
 
140
 
141
- # Convert MIDI to audio using fluidsynth (if available)
142
- try:
143
- from midi2audio import FluidSynth
144
- fs = FluidSynth()
145
- fs.midi_to_audio(midi_path, output_path)
146
-
147
- print(f"Musical accompaniment saved to {output_path}")
148
-
149
- # Clean up midi file
150
- if os.path.exists(midi_path):
151
- os.remove(midi_path)
152
-
153
- return output_path
154
-
155
- except ImportError:
156
- print("FluidSynth not available. Creating synthetic audio instead.")
157
- # Create synthetic audio as fallback
158
- return _generate_synthetic_audio(emotion, sentiment_score, tempo, output_path)
159
-
160
- def _generate_synthetic_audio(emotion, sentiment_score, tempo, output_path):
161
- """Generate synthetic audio using numpy when FluidSynth is not available"""
162
- # Convert tempo to seconds per beat
163
- spb = 60.0 / tempo
164
 
165
- # Sample rate
166
- sr = 22050
167
-
168
- # Duration in seconds (16 beats)
169
- duration = spb * 16
170
-
171
- # Total samples
172
- total_samples = int(sr * duration)
173
-
174
- # Frequencies based on emotion
175
- if emotion == "Happy" or sentiment_score > 0.3:
176
- # Major chord frequencies (C major: C, E, G)
177
- freqs = [261.63, 329.63, 392.00]
178
- elif emotion == "Sad" or sentiment_score < -0.3:
179
- # Minor chord frequencies (A minor: A, C, E)
180
- freqs = [220.00, 261.63, 329.63]
181
- elif emotion == "Angry":
182
- # Diminished chord (B diminished: B, D, F)
183
- freqs = [246.94, 293.66, 349.23]
184
  else:
185
- # Suspended chord (D suspended: D, G, A)
186
- freqs = [293.66, 392.00, 440.00]
187
-
188
- # Generate a simple chord progression
189
- audio = np.zeros(total_samples)
190
-
191
- # Create 4 chords, each for 4 beats
192
- for i in range(4):
193
- chord_start = int(i * 4 * spb * sr)
194
- chord_end = int((i + 1) * 4 * spb * sr)
195
 
196
- # Shift base frequencies based on chord position
197
- if i == 0:
198
- freq_shift = 1.0 # Root
199
- elif i == 1:
200
- freq_shift = 5.0/4.0 # Fourth up
201
- elif i == 2:
202
- freq_shift = 6.0/5.0 # Minor third up from previous
203
  else:
204
- freq_shift = 4.0/3.0 # Perfect fourth up from root
205
-
206
- # Create chord tones
207
- chord_audio = np.zeros(chord_end - chord_start)
208
-
209
- for freq in freqs:
210
- # Create a time array for this segment
211
- t = np.linspace(0, (chord_end - chord_start) / sr, chord_end - chord_start, False)
212
-
213
- # Adjust frequency based on chord position
214
- adjusted_freq = freq * freq_shift
215
-
216
- # Generate sine wave
217
- note = 0.2 * np.sin(2 * np.pi * adjusted_freq * t)
218
-
219
- # Apply envelope
220
- envelope = np.ones_like(t)
221
- attack = int(0.02 * len(t)) # 2% attack
222
- decay = int(0.1 * len(t)) # 10% decay
223
- release = int(0.2 * len(t)) # 20% release
224
-
225
- envelope[:attack] = np.linspace(0, 1, attack)
226
- envelope[-release:] = np.linspace(1, 0, release)
227
-
228
- # Apply envelope
229
- note = note * envelope
230
-
231
- # Add to chord
232
- chord_audio += note
233
-
234
- # Normalize chord
235
- chord_audio = chord_audio / np.max(np.abs(chord_audio))
236
-
237
- # Add to full audio
238
- audio[chord_start:chord_end] += chord_audio
239
 
240
- # Add a simple melody
241
- melody_audio = np.zeros_like(audio)
 
 
242
 
243
- # Generate a few melody notes based on emotion
244
- note_duration = int(0.5 * spb * sr) # Eighth notes
245
 
246
- if emotion == "Happy":
247
- notes_per_measure = 4
248
- elif emotion == "Sad":
249
- notes_per_measure = 2
250
- else:
251
- notes_per_measure = 3
252
 
253
- for measure in range(4):
254
- for note_idx in range(notes_per_measure):
255
- # Calculate start time for this note
256
- start = measure * 4 * spb * sr + note_idx * (4 * spb * sr / notes_per_measure)
257
- start = int(start)
258
-
259
- # Note duration (with a small gap between notes)
260
- end = start + int(0.9 * (4 * spb * sr / notes_per_measure))
261
-
262
- if end > len(melody_audio):
263
- end = len(melody_audio)
264
-
265
- # Choose a frequency based on emotion
266
- if emotion == "Happy":
267
- freq = random.choice([392.00, 440.00, 493.88, 523.25]) # G4, A4, B4, C5
268
- elif emotion == "Sad":
269
- freq = random.choice([329.63, 349.23, 392.00, 440.00]) # E4, F4, G4, A4
270
- else:
271
- freq = random.choice([293.66, 329.63, 349.23, 392.00]) # D4, E4, F4, G4
272
-
273
- # Create time array for this note
274
- t = np.linspace(0, (end - start) / sr, end - start, False)
275
-
276
- # Generate sine wave with some harmonics for richness
277
- note_audio = 0.3 * np.sin(2 * np.pi * freq * t)
278
- note_audio += 0.15 * np.sin(2 * np.pi * freq * 2 * t) # First harmonic
279
- note_audio += 0.05 * np.sin(2 * np.pi * freq * 3 * t) # Second harmonic
280
-
281
- # Apply envelope
282
- envelope = np.ones_like(t)
283
- attack = int(0.1 * len(t))
284
- release = int(0.3 * len(t))
285
-
286
- envelope[:attack] = np.linspace(0, 1, attack)
287
- envelope[-release:] = np.linspace(1, 0, release)
288
-
289
- note_audio = note_audio * envelope
290
-
291
- # Add to melody
292
- melody_audio[start:end] += note_audio
293
 
294
- # Normalize melody
295
- melody_audio = 0.6 * melody_audio / np.max(np.abs(melody_audio))
 
 
 
 
 
 
 
296
 
297
- # Mix chord progression and melody
298
- final_audio = audio + melody_audio
299
 
300
- # Final normalization
301
- final_audio = 0.9 * final_audio / np.max(np.abs(final_audio))
302
 
303
- # Save audio file
304
- sf.write(output_path, final_audio, sr)
305
 
306
- print(f"Synthetic musical accompaniment saved to {output_path}")
 
307
 
308
- return output_path
 
1
+ import os
2
  import numpy as np
3
+ import torch
4
  import librosa
5
  import soundfile as sf
6
+ from transformers import AutoTokenizer, AutoModelForSequenceClassification
7
+ import music21 # Added this import to fix the NameError
 
 
 
8
 
9
+ # Assuming you're using other modules for text-to-singing functionality
10
+ # Add any other imports you need here
11
+
12
+ class EmotionDetector:
13
+ def __init__(self):
14
+ self.tokenizer = AutoTokenizer.from_pretrained("nlptown/bert-base-multilingual-uncased-sentiment")
15
+ self.model = AutoModelForSequenceClassification.from_pretrained("nlptown/bert-base-multilingual-uncased-sentiment")
16
+
17
+ def detect_emotion(self, text):
18
+ # Simple emotion detection based on sentiment
19
+ inputs = self.tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=512)
20
+ outputs = self.model(**inputs)
21
+ sentiment_score = outputs.logits[0].softmax(dim=0)
22
+ sentiment_value = (sentiment_score * torch.tensor([1, 2, 3, 4, 5])).sum().item() / 5.0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
 
24
+ # Map sentiment score to emotion
25
+ if sentiment_value > 0.7:
26
+ return "Happy", sentiment_value * 2 - 1
27
+ elif sentiment_value < 0.3:
28
+ return "Sad", 1 - sentiment_value * 2
29
+ else:
30
+ return "Neutral", 0.0
31
+
32
+ # Function to synthesize speech from text
33
+ def synthesize_speech(text, output_path="temp_speech.wav"):
34
+ # Placeholder for your text-to-speech implementation
35
+ # This should create a speech file at output_path
36
+ print(f"Speech synthesized and saved to {output_path}")
37
+ # Your actual implementation here
38
+
39
+ return output_path
40
+
41
+ # Function to convert speech to singing
42
+ def convert_to_singing(speech_path, emotion="Neutral", emotion_intensity=0.5, output_path="temp_singing.wav"):
43
+ # Placeholder for speech-to-singing conversion
44
+ # This should implement your DiffSinger model to convert speech to singing
45
+ print(f"Singing audio saved to {output_path}")
46
+ # Your actual implementation here
47
+
48
+ return output_path
49
+
50
+ # Function to generate musical accompaniment
51
+ def generate_accompaniment(
52
+ lyrics,
53
+ melody_path,
54
+ output_path="output_accompaniment.mid",
55
+ tempo_value=120,
56
+ key="C",
57
+ time_signature="4/4",
58
+ style="pop"
59
+ ):
60
+ # Create a music21 score
61
+ score = music21.stream.Score()
62
+
63
+ # Create a part for the melody
64
+ melody_part = music21.stream.Part()
65
+
66
+ # Set the tempo - this was causing the error
67
  t = music21.tempo.MetronomeMark(number=tempo_value)
68
+ melody_part.append(t)
69
 
70
+ # Set the key signature
71
+ ks = music21.key.Key(key)
72
+ melody_part.append(ks)
 
 
 
 
 
 
 
 
73
 
74
+ # Set the time signature
75
+ ts = music21.meter.TimeSignature(time_signature)
76
+ melody_part.append(ts)
 
 
 
 
77
 
78
+ # Here you would add notes based on your melody_path
79
+ # This is just a placeholder - you'll need to implement your actual note generation
 
80
 
81
+ # For example, adding a simple C major scale
82
+ notes = ['C4', 'D4', 'E4', 'F4', 'G4', 'A4', 'B4', 'C5']
83
+ for note_name in notes:
84
+ n = music21.note.Note(note_name)
85
+ n.quarterLength = 1.0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
86
  melody_part.append(n)
87
 
88
+ # Add the melody part to the score
89
+ score.append(melody_part)
90
 
91
+ # Add accompaniment based on style
92
+ # This is a placeholder - implement your actual accompaniment generation
93
+ if style == "pop":
94
+ # Add pop-style accompaniment
95
+ pass
96
+ elif style == "classical":
97
+ # Add classical-style accompaniment
98
+ pass
99
+ # Add more styles as needed
100
 
101
+ # Write the score to a MIDI file
102
+ score.write('midi', fp=output_path)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
103
 
104
+ return output_path
105
+
106
+ # Function to combine singing and accompaniment
107
+ def combine_audio(singing_path, accompaniment_path, output_path="final_output.wav"):
108
+ # Load the singing audio
109
+ singing, sr = librosa.load(singing_path, sr=None)
110
+
111
+ # If accompaniment is MIDI, convert it to audio first
112
+ if accompaniment_path.endswith('.mid'):
113
+ # Convert MIDI to audio - implement your conversion method
114
+ # This is a placeholder
115
+ accompaniment = np.zeros_like(singing) # Just a placeholder
 
 
 
 
 
 
 
116
  else:
117
+ # Load the accompaniment audio
118
+ accompaniment, sr_acc = librosa.load(accompaniment_path, sr=sr)
 
 
 
 
 
 
 
 
119
 
120
+ # Make sure they're the same length
121
+ if len(accompaniment) > len(singing):
122
+ accompaniment = accompaniment[:len(singing)]
 
 
 
 
123
  else:
124
+ # Pad accompaniment with zeros if it's shorter
125
+ accompaniment = np.pad(accompaniment, (0, max(0, len(singing) - len(accompaniment))))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
126
 
127
+ # Mix the singing and accompaniment
128
+ # You might want to adjust the volumes
129
+ singing_volume = 0.7
130
+ accompaniment_volume = 0.3
131
 
132
+ mixed = singing_volume * singing + accompaniment_volume * accompaniment
 
133
 
134
+ # Normalize to prevent clipping
135
+ if np.max(np.abs(mixed)) > 1.0:
136
+ mixed = mixed / np.max(np.abs(mixed))
 
 
 
137
 
138
+ # Save the mixed audio
139
+ sf.write(output_path, mixed, sr)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
140
 
141
+ return output_path
142
+
143
+ # Main function to process text to singing
144
+ def text_to_singing(text, output_path="final_output.wav"):
145
+ # Detect emotion in the text
146
+ emotion_detector = EmotionDetector()
147
+ emotion, emotion_intensity = emotion_detector.detect_emotion(text)
148
+ print(f"Detected emotion: {emotion}")
149
+ print(f"Sentiment score: {emotion_intensity}")
150
 
151
+ # Synthesize speech from text
152
+ speech_path = synthesize_speech(text)
153
 
154
+ # Convert speech to singing with emotion
155
+ singing_path = convert_to_singing(speech_path, emotion, emotion_intensity)
156
 
157
+ # Generate musical accompaniment
158
+ accompaniment_path = generate_accompaniment(text, singing_path)
159
 
160
+ # Combine singing and accompaniment
161
+ final_output = combine_audio(singing_path, accompaniment_path, output_path)
162
 
163
+ return final_output, emotion, emotion_intensity