Lambeckkk commited on
Commit
e8008da
·
verified ·
1 Parent(s): 4091abf

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +40 -119
app.py CHANGED
@@ -1,128 +1,49 @@
1
- import os
2
- import uuid
3
- import numpy as np
4
- import torch
5
  import gradio as gr
6
- from scipy.io import wavfile
7
-
8
- # Ensure folders exist
9
- os.makedirs("bark_voices", exist_ok=True)
10
-
11
- # Try model imports
12
- try:
13
- from TTS.api import TTS
14
- except ImportError:
15
- raise RuntimeError("Coqui TTS not installed. Add 'TTS' to requirements.")
16
-
17
- try:
18
- from audiocraft.models.musicgen import MusicGen
19
- except ImportError:
20
- raise RuntimeError("MusicGen not installed. Add audiocraft from GitHub to requirements.")
21
-
22
- # Lazy loading
23
- tts_model = None
24
- music_model = None
25
- USE_GPU = torch.cuda.is_available()
26
-
27
- def load_tts_model():
28
- global tts_model
29
- if tts_model is None:
30
- tts_model = TTS("tts_models/multilingual/multi-dataset/bark", gpu=USE_GPU)
31
- return tts_model
32
-
33
- def load_music_model():
34
- global music_model
35
- if music_model is None:
36
- device = "cuda" if USE_GPU else "cpu"
37
- music_model = MusicGen.get_pretrained(model_name="facebook/musicgen-small", device=device)
38
- music_model.set_generation_params(duration=15)
39
- return music_model
40
-
41
- def generate_voice(text, voice_sample):
42
- if not text.strip():
43
- raise gr.Error("Please enter lyrics or speech text.")
44
-
45
- tts = load_tts_model()
46
- output_path = "voice_output.wav"
47
- speaker_name = None
48
-
49
- try:
50
- if voice_sample:
51
- orig_sr, audio_data = voice_sample
52
- if audio_data.ndim > 1:
53
- audio_data = audio_data.mean(axis=1)
54
- audio_data = audio_data.astype(np.float32)
55
- if orig_sr != 24000:
56
- import torch.nn.functional as F
57
- audio_tensor = torch.tensor(audio_data).unsqueeze(0)
58
- resampled_len = int(audio_tensor.shape[1] * 24000 / orig_sr)
59
- resampled = F.interpolate(audio_tensor.unsqueeze(1), size=resampled_len, mode="linear", align_corners=False)
60
- audio_data = resampled.squeeze().numpy()
61
- orig_sr = 24000
62
- max_val = np.max(np.abs(audio_data))
63
- if max_val > 0:
64
- audio_data /= max_val
65
- audio_data = (audio_data * 32767).astype(np.int16)
66
-
67
- speaker_id = f"user_{uuid.uuid4().hex[:8]}"
68
- speaker_dir = os.path.join("bark_voices", speaker_id)
69
- os.makedirs(speaker_dir, exist_ok=True)
70
- sample_path = os.path.join(speaker_dir, "speaker.wav")
71
- wavfile.write(sample_path, orig_sr, audio_data)
72
- speaker_name = speaker_id
73
-
74
- with torch.no_grad():
75
- if speaker_name:
76
- tts.tts_to_file(text=text, file_path=output_path, speaker=speaker_name, voice_dir="bark_voices/")
77
- else:
78
- tts.tts_to_file(text=text, file_path=output_path)
79
 
80
- return output_path
 
 
 
 
81
 
82
- except Exception as e:
83
- print(f"Voice generation error: {e}")
84
- raise gr.Error("Voice generation failed. Try again.")
 
85
 
 
86
  def generate_music(prompt):
87
- if not prompt.strip():
88
- raise gr.Error("Please enter a music description.")
89
-
90
- model = load_music_model()
91
- output_path = "music_output.wav"
92
-
93
- try:
94
- with torch.no_grad():
95
- wavs = model.generate([prompt])
96
- sr = model.sample_rate
97
- audio_tensor = wavs[0].cpu().numpy()
98
- if audio_tensor.ndim == 2:
99
- audio_np = audio_tensor.T
100
- else:
101
- audio_np = audio_tensor
102
- wavfile.write(output_path, sr, (audio_np * 32767).astype(np.int16))
103
- return output_path
104
-
105
- except Exception as e:
106
- print(f"Music generation error: {e}")
107
- raise gr.Error("Music generation failed. Try a different prompt.")
108
-
109
- with gr.Blocks(css=".gradio-container {background-color: #121212; color: white;}") as app:
110
- gr.Markdown("# LarynxLab – AI Music & Voice Generator")
111
-
112
- with gr.Tabs():
113
- with gr.Tab("Lyrics → Voice"):
114
- gr.Markdown("Upload an optional voice sample (max 20 sec) and enter lyrics.")
115
- voice_input = gr.Audio(label="Voice Sample (optional)", type="numpy")
116
- text_input = gr.Textbox(label="Lyrics / Speech", lines=3)
117
  voice_btn = gr.Button("Generate Voice")
118
- voice_output = gr.Audio(label="Output Voice", type="filepath")
119
- voice_btn.click(generate_voice, inputs=[text_input, voice_input], outputs=voice_output)
120
 
121
- with gr.Tab("Text → Music"):
122
- gr.Markdown("Describe the music: genre, vibe, instruments, etc.")
123
- music_prompt = gr.Textbox(label="Music Prompt", lines=3)
124
  music_btn = gr.Button("Generate Music")
125
- music_output = gr.Audio(label="Music Output", type="filepath")
126
- music_btn.click(generate_music, inputs=music_prompt, outputs=music_output)
 
 
 
127
 
128
- app.queue(concurrency_count=1).launch()
 
 
 
 
 
1
  import gradio as gr
2
+ from transformers import pipeline, set_seed
3
+ from audiocraft.models import MusicGen
4
+ from TTS.api import TTS
5
+ import torch
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
 
7
+ # Load models
8
+ set_seed(42)
9
+ lyrics_generator = pipeline("text-generation", model="gpt2")
10
+ music_model = MusicGen.get_pretrained('facebook/musicgen-small')
11
+ tts_model = TTS(model_name="tts_models/multilingual/multi-dataset/bark", progress_bar=False, gpu=torch.cuda.is_available())
12
 
13
+ # Lyric generation
14
+ def generate_lyrics(prompt):
15
+ result = lyrics_generator(prompt, max_length=100, num_return_sequences=1)
16
+ return result[0]['generated_text']
17
 
18
+ # Music generation
19
  def generate_music(prompt):
20
+ music_model.set_generation_params(duration=10)
21
+ output = music_model.generate([prompt])
22
+ return (16000, output[0].cpu().numpy())
23
+
24
+ # Voice generation
25
+ def generate_voice(text):
26
+ output_path = "bark_output.wav"
27
+ tts_model.tts_to_file(text=text, file_path=output_path)
28
+ return output_path
29
+
30
+ # Unified UI
31
+ with gr.Blocks(theme=gr.themes.Base(), css="body {background-color: #121212; color: white;}") as demo:
32
+ with gr.Row():
33
+ with gr.Column():
34
+ desc_input = gr.Textbox(label="Describe your idea", placeholder="A sad lo-fi song about lost love...")
35
+ generate_btn = gr.Button("Generate Lyrics")
36
+ lyrics_output = gr.Textbox(label="Generated Lyrics")
 
 
 
 
 
 
 
 
 
 
 
 
 
37
  voice_btn = gr.Button("Generate Voice")
38
+ voice_audio = gr.Audio(label="Vocal Output", type="filepath")
 
39
 
40
+ with gr.Column():
41
+ music_prompt = gr.Textbox(label="Music Prompt", placeholder="lo-fi sad beat with piano")
 
42
  music_btn = gr.Button("Generate Music")
43
+ music_audio = gr.Audio(label="Music Output", type="numpy")
44
+
45
+ generate_btn.click(generate_lyrics, inputs=desc_input, outputs=lyrics_output)
46
+ voice_btn.click(generate_voice, inputs=lyrics_output, outputs=voice_audio)
47
+ music_btn.click(generate_music, inputs=music_prompt, outputs=music_audio)
48
 
49
+ demo.launch()