Spaces:

Vaishnavi0404
/

Text2Sing-DiffSinger

Running

App Files Files Community

Text2Sing-DiffSinger / music_generator.py

Vaishnavi0404

Update music_generator.py

0c413ab verified 12 months ago

raw

history blame contribute delete

5.84 kB

	import os
	import numpy as np
	import torch
	import librosa
	import soundfile as sf
	from transformers import AutoTokenizer, AutoModelForSequenceClassification
	import music21 # Added this import to fix the NameError

	# Assuming you're using other modules for text-to-singing functionality
	# Add any other imports you need here

	class EmotionDetector:
	def __init__(self):
	self.tokenizer = AutoTokenizer.from_pretrained("nlptown/bert-base-multilingual-uncased-sentiment")
	self.model = AutoModelForSequenceClassification.from_pretrained("nlptown/bert-base-multilingual-uncased-sentiment")

	def detect_emotion(self, text):
	# Simple emotion detection based on sentiment
	inputs = self.tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=512)
	outputs = self.model(**inputs)
	sentiment_score = outputs.logits[0].softmax(dim=0)
	sentiment_value = (sentiment_score * torch.tensor([1, 2, 3, 4, 5])).sum().item() / 5.0

	# Map sentiment score to emotion
	if sentiment_value > 0.7:
	return "Happy", sentiment_value * 2 - 1
	elif sentiment_value < 0.3:
	return "Sad", 1 - sentiment_value * 2
	else:
	return "Neutral", 0.0

	# Function to synthesize speech from text
	def synthesize_speech(text, output_path="temp_speech.wav"):
	# Placeholder for your text-to-speech implementation
	# This should create a speech file at output_path
	print(f"Speech synthesized and saved to {output_path}")
	# Your actual implementation here

	return output_path

	# Function to convert speech to singing
	def convert_to_singing(speech_path, emotion="Neutral", emotion_intensity=0.5, output_path="temp_singing.wav"):
	# Placeholder for speech-to-singing conversion
	# This should implement your DiffSinger model to convert speech to singing
	print(f"Singing audio saved to {output_path}")
	# Your actual implementation here

	return output_path

	# Function to generate musical accompaniment
	def generate_accompaniment(
	lyrics,
	melody_path,
	output_path="output_accompaniment.mid",
	tempo_value=120,
	key="C",
	time_signature="4/4",
	style="pop"
	):
	# Create a music21 score
	score = music21.stream.Score()

	# Create a part for the melody
	melody_part = music21.stream.Part()

	# Set the tempo - this was causing the error
	t = music21.tempo.MetronomeMark(number=tempo_value)
	melody_part.append(t)

	# Set the key signature
	ks = music21.key.Key(key)
	melody_part.append(ks)

	# Set the time signature
	ts = music21.meter.TimeSignature(time_signature)
	melody_part.append(ts)

	# Here you would add notes based on your melody_path
	# This is just a placeholder - you'll need to implement your actual note generation

	# For example, adding a simple C major scale
	notes = ['C4', 'D4', 'E4', 'F4', 'G4', 'A4', 'B4', 'C5']
	for note_name in notes:
	n = music21.note.Note(note_name)
	n.quarterLength = 1.0
	melody_part.append(n)

	# Add the melody part to the score
	score.append(melody_part)

	# Add accompaniment based on style
	# This is a placeholder - implement your actual accompaniment generation
	if style == "pop":
	# Add pop-style accompaniment
	pass
	elif style == "classical":
	# Add classical-style accompaniment
	pass
	# Add more styles as needed

	# Write the score to a MIDI file
	score.write('midi', fp=output_path)

	return output_path

	# Function to combine singing and accompaniment
	def combine_audio(singing_path, accompaniment_path, output_path="final_output.wav"):
	# Load the singing audio
	singing, sr = librosa.load(singing_path, sr=None)

	# If accompaniment is MIDI, convert it to audio first
	if accompaniment_path.endswith('.mid'):
	# Convert MIDI to audio - implement your conversion method
	# This is a placeholder
	accompaniment = np.zeros_like(singing) # Just a placeholder
	else:
	# Load the accompaniment audio
	accompaniment, sr_acc = librosa.load(accompaniment_path, sr=sr)

	# Make sure they're the same length
	if len(accompaniment) > len(singing):
	accompaniment = accompaniment[:len(singing)]
	else:
	# Pad accompaniment with zeros if it's shorter
	accompaniment = np.pad(accompaniment, (0, max(0, len(singing) - len(accompaniment))))

	# Mix the singing and accompaniment
	# You might want to adjust the volumes
	singing_volume = 0.7
	accompaniment_volume = 0.3

	mixed = singing_volume * singing + accompaniment_volume * accompaniment

	# Normalize to prevent clipping
	if np.max(np.abs(mixed)) > 1.0:
	mixed = mixed / np.max(np.abs(mixed))

	# Save the mixed audio
	sf.write(output_path, mixed, sr)

	return output_path

	# Main function to process text to singing
	def text_to_singing(text, output_path="final_output.wav"):
	# Detect emotion in the text
	emotion_detector = EmotionDetector()
	emotion, emotion_intensity = emotion_detector.detect_emotion(text)
	print(f"Detected emotion: {emotion}")
	print(f"Sentiment score: {emotion_intensity}")

	# Synthesize speech from text
	speech_path = synthesize_speech(text)

	# Convert speech to singing with emotion
	singing_path = convert_to_singing(speech_path, emotion, emotion_intensity)

	# Generate musical accompaniment
	accompaniment_path = generate_accompaniment(text, singing_path)

	# Combine singing and accompaniment
	final_output = combine_audio(singing_path, accompaniment_path, output_path)

	return final_output, emotion, emotion_intensity