Vaishnavi0404 commited on
Commit
86e14e9
·
verified ·
1 Parent(s): c8e3569

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +155 -11
app.py CHANGED
@@ -1,15 +1,159 @@
 
1
  import gradio as gr
2
- from diff_singer_infer import run_diffsinger_inference
 
 
 
 
 
 
 
 
 
 
 
3
 
4
- def convert_to_singing_style(input_audio):
5
- return run_diffsinger_inference(input_audio)
 
 
6
 
7
- demo = gr.Interface(
8
- fn=convert_to_singing_style,
9
- inputs=gr.Audio(type="filepath", label="Input TTS + Music Audio"),
10
- outputs=gr.Audio(type="filepath", label="Singing Style Output"),
11
- title="🎤 Text2Sing - DiffSinger Inference",
12
- description="Upload merged TTS + Music audio and convert it to expressive singing voice using pitch/vibrato modification."
13
- )
14
 
15
- demo.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
  import gradio as gr
3
+ import torch
4
+ import numpy as np
5
+ import librosa
6
+ import text2emotion as te
7
+ import nltk
8
+ import soundfile as sf
9
+ from pydub import AudioSegment
10
+ from transformers import pipeline
11
+ from music_generator import generate_accompaniment
12
+ from text_processor import TextProcessor
13
+ from voice_synthesizer import VoiceSynthesizer
14
+ from singing_converter import SingingConverter
15
 
16
+ # Download necessary NLTK data
17
+ nltk.download('omw-1.4')
18
+ nltk.download('vader_lexicon')
19
+ nltk.download('punkt')
20
 
21
+ # Initialize components
22
+ text_processor = TextProcessor()
23
+ voice_synthesizer = VoiceSynthesizer()
24
+ singing_converter = SingingConverter()
 
 
 
25
 
26
+ # Setup sentiment analysis
27
+ sentiment_analyzer = pipeline("sentiment-analysis")
28
+
29
+ def process_text_to_singing(text, voice_type="neutral", tempo=100, pitch_shift=0):
30
+ """
31
+ Convert text to singing voice with accompaniment based on mood
32
+
33
+ Args:
34
+ text (str): Input text to be converted to singing
35
+ voice_type (str): Type of voice (neutral, feminine, masculine)
36
+ tempo (int): Speed of the singing (60-180 BPM)
37
+ pitch_shift (int): Pitch adjustment (-12 to 12 semitones)
38
+
39
+ Returns:
40
+ tuple: (input_audio_path, output_audio_path)
41
+ """
42
+ # Step 1: Analyze text for emotion/mood
43
+ emotions = te.get_emotion(text)
44
+ dominant_emotion = max(emotions.items(), key=lambda x: x[1])[0]
45
+
46
+ # Additional sentiment analysis
47
+ sentiment_result = sentiment_analyzer(text)[0]
48
+ sentiment_score = sentiment_result['score'] * (1 if sentiment_result['label'] == 'POSITIVE' else -1)
49
+
50
+ print(f"Detected emotion: {dominant_emotion}")
51
+ print(f"Sentiment score: {sentiment_score}")
52
+
53
+ # Step 2: Process text for pronunciation and timing
54
+ phonemes, durations, stress_markers = text_processor.process(text)
55
+
56
+ # Step 3: Generate speech audio first
57
+ speech_audio_path = "temp_speech.wav"
58
+ voice_synthesizer.synthesize(
59
+ text=text,
60
+ output_path=speech_audio_path,
61
+ voice_type=voice_type
62
+ )
63
+
64
+ # Step 4: Convert speech to singing
65
+ singing_audio_path = "temp_singing.wav"
66
+ singing_converter.convert(
67
+ speech_path=speech_audio_path,
68
+ output_path=singing_audio_path,
69
+ emotion=dominant_emotion,
70
+ phonemes=phonemes,
71
+ durations=durations,
72
+ stress_markers=stress_markers,
73
+ pitch_shift=pitch_shift,
74
+ tempo=tempo
75
+ )
76
+
77
+ # Step 5: Generate musical accompaniment based on mood
78
+ accompaniment_path = "temp_accompaniment.wav"
79
+ generate_accompaniment(
80
+ emotion=dominant_emotion,
81
+ sentiment_score=sentiment_score,
82
+ tempo=tempo,
83
+ output_path=accompaniment_path
84
+ )
85
+
86
+ # Step 6: Mix singing voice with accompaniment
87
+ final_output_path = "output_song.wav"
88
+
89
+ # Load audio files
90
+ singing = AudioSegment.from_file(singing_audio_path)
91
+ accompaniment = AudioSegment.from_file(accompaniment_path)
92
+
93
+ # Adjust volumes
94
+ singing = singing - 3 # Reduce singing volume slightly
95
+ accompaniment = accompaniment - 10 # Reduce accompaniment volume more
96
+
97
+ # Make sure accompaniment is the same length as singing
98
+ if len(accompaniment) < len(singing):
99
+ # Loop accompaniment to match singing length
100
+ times_to_repeat = (len(singing) / len(accompaniment)) + 1
101
+ accompaniment = accompaniment * int(times_to_repeat)
102
+
103
+ accompaniment = accompaniment[:len(singing)]
104
+
105
+ # Mix tracks
106
+ mixed = singing.overlay(accompaniment)
107
+ mixed.export(final_output_path, format="wav")
108
+
109
+ return speech_audio_path, final_output_path
110
+
111
+ # Create Gradio interface
112
+ with gr.Blocks(title="Text2Sing-DiffSinger") as demo:
113
+ gr.Markdown("# Text2Sing-DiffSinger")
114
+ gr.Markdown("Convert text into singing voice with musical accompaniment based on emotional content")
115
+
116
+ with gr.Row():
117
+ with gr.Column():
118
+ text_input = gr.Textbox(
119
+ label="Enter text to convert to singing",
120
+ placeholder="Type your lyrics here...",
121
+ lines=5
122
+ )
123
+
124
+ with gr.Row():
125
+ voice_type = gr.Dropdown(
126
+ label="Voice Type",
127
+ choices=["neutral", "feminine", "masculine"],
128
+ value="neutral"
129
+ )
130
+ tempo = gr.Slider(
131
+ label="Tempo (BPM)",
132
+ minimum=60,
133
+ maximum=180,
134
+ value=100,
135
+ step=5
136
+ )
137
+ pitch_shift = gr.Slider(
138
+ label="Pitch Adjustment",
139
+ minimum=-12,
140
+ maximum=12,
141
+ value=0,
142
+ step=1
143
+ )
144
+
145
+ convert_btn = gr.Button("Convert to Singing")
146
+
147
+ with gr.Column():
148
+ input_audio = gr.Audio(label="Original Speech")
149
+ output_audio = gr.Audio(label="Singing Output")
150
+
151
+ convert_btn.click(
152
+ fn=process_text_to_singing,
153
+ inputs=[text_input, voice_type, tempo, pitch_shift],
154
+ outputs=[input_audio, output_audio]
155
+ )
156
+
157
+ # Launch the app
158
+ if __name__ == "__main__":
159
+ demo.launch()