kepsmiling121 commited on
Commit
316e456
·
verified ·
1 Parent(s): 05eba2c

Create interfaces/text_to_music.py

Browse files
Files changed (1) hide show
  1. interfaces/text_to_music.py +210 -0
interfaces/text_to_music.py ADDED
@@ -0,0 +1,210 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Text-to-Music generation interface
3
+ """
4
+ import gradio as gr
5
+ import numpy as np
6
+ from typing import Optional
7
+ import logging
8
+
9
+ from models.model_manager import ModelManager
10
+ from utils.ui_components import UIComponents
11
+ from utils.audio_processor import AudioProcessor
12
+
13
+ logger = logging.getLogger(__name__)
14
+
15
+ class TextToMusicInterface:
16
+ def __init__(self, model_manager: ModelManager):
17
+ self.model_manager = model_manager
18
+ self.audio_processor = AudioProcessor()
19
+
20
+ def create_interface(self) -> gr.Interface:
21
+ """Create the text-to-music interface"""
22
+ with gr.Group():
23
+ gr.Markdown("## 🎼 Text-to-Music Generation")
24
+ gr.Markdown("Generate music from text descriptions")
25
+
26
+ with gr.Row():
27
+ with gr.Column(scale=2):
28
+ text_input = gr.Textbox(
29
+ label="Music Description",
30
+ placeholder="Describe the music you want to generate...\nExamples:\n- Upbeat jazz with piano and drums\n- Electronic dance music with synthesizers\n- Calm acoustic guitar melody",
31
+ lines=4,
32
+ max_lines=6
33
+ )
34
+
35
+ with gr.Row():
36
+ model_dropdown = UIComponents.create_model_dropdown()
37
+ preset_dropdown = UIComponents.create_preset_dropdown()
38
+
39
+ with gr.Row():
40
+ duration_slider = UIComponents.create_duration_slider()
41
+ guidance_slider = UIComponents.create_guidance_slider()
42
+
43
+ with gr.Row():
44
+ generate_btn = gr.Button("🎵 Generate Music", variant="primary", scale=2)
45
+ cancel_btn = gr.Button("⏹️ Cancel", variant="secondary")
46
+
47
+ with gr.Column(scale=3):
48
+ audio_output = UIComponents.create_audio_player("Generated Music")
49
+
50
+ with gr.Row():
51
+ download_btn = gr.DownloadButton("💾 Download", variant="secondary")
52
+ share_btn = gr.Button("🔗 Share", variant="secondary")
53
+ regenerate_btn = gr.Button("🔄 Regenerate", variant="secondary")
54
+
55
+ # Advanced options (collapsed by default)
56
+ with gr.Accordion("Advanced Options", open=False):
57
+ with gr.Row():
58
+ temperature_slider = gr.Slider(0.1, 2.0, 1.0, label="Temperature")
59
+ top_k_slider = gr.Slider(1, 100, 50, label="Top K")
60
+ with gr.Row():
61
+ seed_input = gr.Number(label="Seed (0 for random)", value=0, precision=0)
62
+ batch_size = gr.Slider(1, 5, 1, step=1, label="Batch Size")
63
+
64
+ # Visualization tabs
65
+ with gr.Tabs():
66
+ with gr.Tab("Waveform"):
67
+ waveform_plot = gr.Plot(label="Waveform Visualization")
68
+
69
+ with gr.Tab("Spectrogram"):
70
+ spectrogram_plot = gr.Plot(label="Spectrogram")
71
+
72
+ with gr.Tab("Audio Info"):
73
+ info_text = gr.Textbox(label="Audio Information", lines=5, interactive=False)
74
+
75
+ # Examples
76
+ gr.Examples(
77
+ examples=[
78
+ ["Upbeat jazz with piano and drums, 120 BPM"],
79
+ ["Electronic dance music with synthesizers, 128 BPM"],
80
+ ["Calm acoustic guitar melody in the style of Bob Dylan"],
81
+ ["Orchestral cinematic music, dramatic and emotional"],
82
+ ["Lo-fi hip hop beat, relaxed and chill"],
83
+ ["Classical piano piece in the style of Chopin"],
84
+ ["Reggae with off-beat guitar and bass, 90 BPM"],
85
+ ["Heavy metal with distorted guitars and fast drums"]
86
+ ],
87
+ inputs=text_input,
88
+ label="Example Prompts"
89
+ )
90
+
91
+ # Event handlers
92
+ generate_btn.click(
93
+ fn=self.generate_music,
94
+ inputs=[
95
+ text_input, model_dropdown, duration_slider, guidance_slider,
96
+ temperature_slider, top_k_slider, seed_input
97
+ ],
98
+ outputs=[audio_output, waveform_plot, spectrogram_plot, info_text]
99
+ )
100
+
101
+ preset_dropdown.change(
102
+ fn=self.apply_preset,
103
+ inputs=preset_dropdown,
104
+ outputs=[text_input, duration_slider, model_dropdown]
105
+ )
106
+
107
+ regenerate_btn.click(
108
+ fn=self.regenerate_music,
109
+ inputs=[
110
+ text_input, model_dropdown, duration_slider, guidance_slider,
111
+ temperature_slider, top_k_slider
112
+ ],
113
+ outputs=[audio_output, waveform_plot, spectrogram_plot, info_text]
114
+ )
115
+
116
+ return text_input
117
+
118
+ def generate_music(
119
+ self,
120
+ prompt: str,
121
+ model_name: str,
122
+ duration: int,
123
+ guidance_scale: float,
124
+ temperature: float,
125
+ top_k: int,
126
+ seed: int
127
+ ):
128
+ """Generate music from text prompt"""
129
+ try:
130
+ if not prompt.strip():
131
+ raise gr.Error("Please enter a music description")
132
+
133
+ # Get model
134
+ model = self.model_manager.get_model(model_name)
135
+ if not model:
136
+ raise gr.Error(f"Model {model_name} not available")
137
+
138
+ # Set seed if provided
139
+ if seed > 0:
140
+ np.random.seed(seed)
141
+
142
+ # Generate audio
143
+ logger.info(f"Generating music: {prompt[:50]}...")
144
+
145
+ audio_array = model.generate_from_text(
146
+ prompt=prompt,
147
+ duration=duration,
148
+ guidance_scale=guidance_scale,
149
+ temperature=temperature,
150
+ top_k=top_k
151
+ )
152
+
153
+ # Process audio
154
+ audio_array = self.audio_processor.normalize_audio(audio_array)
155
+ audio_array = self.audio_processor.apply_fade(audio_array)
156
+
157
+ # Save to temporary file
158
+ output_path = self.audio_processor.create_temp_file()
159
+ self.audio_processor.save_audio(audio_array, output_path)
160
+
161
+ # Create visualizations
162
+ waveform_fig = UIComponents.create_audio_visualization(audio_array)
163
+ spectrogram_fig = UIComponents.create_spectrogram_visualization(audio_array)
164
+
165
+ # Audio info
166
+ info = self._get_audio_info(audio_array, duration)
167
+
168
+ logger.info("Music generation completed successfully")
169
+
170
+ return output_path, waveform_fig, spectrogram_fig, info
171
+
172
+ except Exception as e:
173
+ logger.error(f"Generation failed: {str(e)}")
174
+ raise gr.Error(f"Generation failed: {str(e)}")
175
+
176
+ def apply_preset(self, preset_name: str):
177
+ """Apply preset configuration"""
178
+ from config import PRESETS
179
+ if preset_name in PRESETS:
180
+ preset = PRESETS[preset_name]
181
+ return preset["prompt"], preset["duration"], preset["model"]
182
+ return "", 10, "musicgen_small"
183
+
184
+ def regenerate_music(self, *args):
185
+ """Regenerate with same parameters but different seed"""
186
+ # Use random seed
187
+ args_list = list(args)
188
+ args_list[-1] = 0 # Set seed to 0 (random)
189
+ return self.generate_music(*args_list)
190
+
191
+ def _get_audio_info(self, audio_array: np.ndarray, duration: int) -> str:
192
+ """Get audio information"""
193
+ from utils.audio_processor import AudioProcessor
194
+ processor = AudioProcessor()
195
+
196
+ tempo = processor.get_tempo(audio_array)
197
+ sample_rate = processor.sample_rate
198
+
199
+ info = f"""Generated Audio Information:
200
+
201
+ Duration: {duration} seconds
202
+ Sample Rate: {sample_rate} Hz
203
+ Channels: Mono
204
+ Estimated Tempo: {tempo:.1f} BPM
205
+ Total Samples: {len(audio_array)}
206
+ File Size: ~{len(audio_array) * 2 / 1024 / 1024:.1f} MB
207
+ Peak Amplitude: {np.max(np.abs(audio_array)):.3f}
208
+ RMS Level: {np.sqrt(np.mean(audio_array**2)):.3f}"""
209
+
210
+ return info