NitinBot001 commited on
Commit
fa2f0d0
·
verified ·
1 Parent(s): 5181adf

Upload 8 files

Browse files
Files changed (8) hide show
  1. .gitignore +7 -0
  2. Dockerfile +25 -0
  3. SDL.yaml +60 -0
  4. app.py +276 -0
  5. main.py +115 -0
  6. public/AkashLogo.svg +16 -0
  7. public/favicon.ico +0 -0
  8. requirements.txt +9 -0
.gitignore ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ .venv
2
+ .aider*
3
+ .env
4
+ __pycache__
5
+ .gradio
6
+ output/*
7
+ !output/.gitkeep
Dockerfile ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM pytorch/pytorch:2.1.0-cuda12.1-cudnn8-runtime
2
+
3
+ ENV GRADIO_SERVER_PORT=7860
4
+ ENV GRADIO_SERVER_NAME="0.0.0.0"
5
+
6
+ # Create mount point and set permissions for persistent storage
7
+ RUN mkdir -p /mnt && \
8
+ chown -R 1000:1000 /mnt && \
9
+ chmod 755 /mnt
10
+ VOLUME /mnt
11
+
12
+ WORKDIR /opt/app
13
+
14
+ # Copy requirements first for better caching
15
+ COPY requirements.txt requirements.txt
16
+
17
+ RUN apt-get update && apt-get install -y git && \
18
+ pip install -r requirements.txt
19
+
20
+ # Copy application files
21
+ COPY app.py .
22
+ COPY public/ /opt/app/public/
23
+
24
+ # Run the Gradio app
25
+ CMD ["python", "app.py"]
SDL.yaml ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ version: "2.0"
3
+ services:
4
+ bark-small:
5
+ image: alexpedersen/audio-akash:0.1.3
6
+ expose:
7
+ - port: 7860
8
+ as: 7860
9
+ to:
10
+ - global: true
11
+ # accept:
12
+ # - cars.ingress.europlots.com
13
+ params:
14
+ storage:
15
+ data:
16
+ mount: /mnt/
17
+ readOnly: false
18
+ # Optional but recommended
19
+ env:
20
+ - OUTPUT_DIR=/mnt/output # Use persistent storage for app generated files
21
+ - HF_HOME=/mnt/huggingface # Use persistent storage for model cache
22
+ # - PUBLIC_URL=cars.ingress.europlots.com
23
+ profiles:
24
+ compute:
25
+ bark-small:
26
+ resources:
27
+ cpu:
28
+ units: 6
29
+ memory:
30
+ size: 16Gi
31
+ storage:
32
+ - size: 4GB
33
+ - name: data
34
+ size: 40GB
35
+ attributes:
36
+ persistent: true
37
+ class: beta3
38
+ gpu:
39
+ units: 1
40
+ attributes:
41
+ vendor:
42
+ nvidia:
43
+ # min 8gb GPU, for example:
44
+ # - model: rtx4090
45
+ # - model: rtx4080
46
+ # - model: rtx4070
47
+ # - model: rtx3090
48
+ # - model: rtx3080
49
+ # - model: rtx3070
50
+ placement:
51
+ dcloud:
52
+ pricing:
53
+ bark-small:
54
+ denom: uakt
55
+ amount: 1000
56
+ deployment:
57
+ bark-small:
58
+ dcloud:
59
+ profile: bark-small
60
+ count: 1
app.py ADDED
@@ -0,0 +1,276 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import time
3
+ import glob
4
+ from datetime import datetime, timedelta
5
+
6
+ import numpy as np
7
+ import torch
8
+ from scipy.io.wavfile import write as write_wav
9
+
10
+ from transformers import AutoProcessor, AutoModelForTextToWaveform, BarkModel
11
+
12
+ import gradio as gr
13
+ from fastapi import FastAPI
14
+ from fastapi.staticfiles import StaticFiles
15
+ import uvicorn
16
+ from apscheduler.schedulers.background import BackgroundScheduler
17
+
18
+ OUTPUT_DIR = os.environ.get("OUTPUT_DIR", "output")
19
+ os.makedirs(OUTPUT_DIR, exist_ok=True)
20
+ os.environ.update({
21
+ "SUNO_OFFLOAD_CPU": "True",
22
+ "SUNO_USE_SMALL_MODELS": "True"
23
+ })
24
+
25
+ device = "cuda" if torch.cuda.is_available() else "cpu"
26
+ processor = AutoProcessor.from_pretrained("suno/bark-small")
27
+ model = (BarkModel.from_pretrained("suno/bark-small", torch_dtype=torch.float16)
28
+ .to(device)
29
+ .to_bettertransformer())
30
+
31
+ def create_bark_audio(text, voice_preset, device):
32
+ inputs = processor(text, voice_preset=voice_preset)
33
+ inputs = {k: v.to(device) if hasattr(v, 'to') else v for k, v in inputs.items()}
34
+ audio_array = model.generate(**inputs)
35
+ return audio_array.cpu().numpy().squeeze(), model.generation_config.sample_rate
36
+
37
+ def save_audio(audio_array, sample_rate, prefix="audio"):
38
+ audio_array = np.clip(audio_array.astype(np.float32), -1, 1)
39
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
40
+ filename = os.path.join(OUTPUT_DIR, f"{prefix}_{timestamp}.wav")
41
+ write_wav(filename, sample_rate, audio_array)
42
+ return filename
43
+
44
+ def generate_speech(text, voice_preset="v2/en_speaker_6"):
45
+ device = "cuda" if torch.cuda.is_available() else "cpu"
46
+ audio_array, sample_rate = create_bark_audio(text, voice_preset, device)
47
+ return save_audio(audio_array, sample_rate)
48
+
49
+ def text_to_speech_with_url(text, voice):
50
+ audio_file = generate_speech(text, VOICES[voice])
51
+ filename = os.path.basename(audio_file)
52
+ base_url = os.environ.get("PUBLIC_URL", "http://localhost:7860")
53
+ return audio_file, f"{base_url}/generated/{filename}"
54
+
55
+ def cleanup_old_files():
56
+ cutoff_time = datetime.now() - timedelta(hours=24)
57
+ for file in glob.glob(os.path.join(OUTPUT_DIR, "audio_*.wav")):
58
+ if datetime.fromtimestamp(os.path.getmtime(file)) < cutoff_time:
59
+ try:
60
+ os.remove(file)
61
+ except Exception as e:
62
+ print(f"Error removing file {file}: {e}")
63
+
64
+ VOICES = {
65
+ 'Speaker 0 (EN)':'v2/en_speaker_0',
66
+ 'Speaker 1 (EN)':'v2/en_speaker_1',
67
+ 'Speaker 2 (EN)':'v2/en_speaker_2',
68
+ 'Speaker 3 (EN)':'v2/en_speaker_3',
69
+ 'Speaker 4 (EN)':'v2/en_speaker_4',
70
+ 'Speaker 5 (EN)':'v2/en_speaker_5',
71
+ 'Speaker 6 (EN)':'v2/en_speaker_6',
72
+ 'Speaker 7 (EN)':'v2/en_speaker_7',
73
+ 'Speaker 8 (EN)':'v2/en_speaker_8',
74
+ 'Speaker 9 (EN)':'v2/en_speaker_9',
75
+ 'Speaker 0 (ZH)':'v2/zh_speaker_0',
76
+ 'Speaker 1 (ZH)':'v2/zh_speaker_1',
77
+ 'Speaker 2 (ZH)':'v2/zh_speaker_2',
78
+ 'Speaker 3 (ZH)':'v2/zh_speaker_3',
79
+ 'Speaker 4 (ZH)':'v2/zh_speaker_4',
80
+ 'Speaker 5 (ZH)':'v2/zh_speaker_5',
81
+ 'Speaker 6 (ZH)':'v2/zh_speaker_6',
82
+ 'Speaker 7 (ZH)':'v2/zh_speaker_7',
83
+ 'Speaker 8 (ZH)':'v2/zh_speaker_8',
84
+ 'Speaker 9 (ZH)':'v2/zh_speaker_9',
85
+ 'Speaker 0 (FR)':'v2/fr_speaker_0',
86
+ 'Speaker 1 (FR)':'v2/fr_speaker_1',
87
+ 'Speaker 2 (FR)':'v2/fr_speaker_2',
88
+ 'Speaker 3 (FR)':'v2/fr_speaker_3',
89
+ 'Speaker 4 (FR)':'v2/fr_speaker_4',
90
+ 'Speaker 5 (FR)':'v2/fr_speaker_5',
91
+ 'Speaker 6 (FR)':'v2/fr_speaker_6',
92
+ 'Speaker 7 (FR)':'v2/fr_speaker_7',
93
+ 'Speaker 8 (FR)':'v2/fr_speaker_8',
94
+ 'Speaker 9 (FR)':'v2/fr_speaker_9',
95
+ 'Speaker 0 (DE)':'v2/de_speaker_0',
96
+ 'Speaker 1 (DE)':'v2/de_speaker_1',
97
+ 'Speaker 2 (DE)':'v2/de_speaker_2',
98
+ 'Speaker 3 (DE)':'v2/de_speaker_3',
99
+ 'Speaker 4 (DE)':'v2/de_speaker_4',
100
+ 'Speaker 5 (DE)':'v2/de_speaker_5',
101
+ 'Speaker 6 (DE)':'v2/de_speaker_6',
102
+ 'Speaker 7 (DE)':'v2/de_speaker_7',
103
+ 'Speaker 8 (DE)':'v2/de_speaker_8',
104
+ 'Speaker 9 (DE)':'v2/de_speaker_9',
105
+ 'Speaker 0 (HI)':'v2/hi_speaker_0',
106
+ 'Speaker 1 (HI)':'v2/hi_speaker_1',
107
+ 'Speaker 2 (HI)':'v2/hi_speaker_2',
108
+ 'Speaker 3 (HI)':'v2/hi_speaker_3',
109
+ 'Speaker 4 (HI)':'v2/hi_speaker_4',
110
+ 'Speaker 5 (HI)':'v2/hi_speaker_5',
111
+ 'Speaker 6 (HI)':'v2/hi_speaker_6',
112
+ 'Speaker 7 (HI)':'v2/hi_speaker_7',
113
+ 'Speaker 8 (HI)':'v2/hi_speaker_8',
114
+ 'Speaker 9 (HI)':'v2/hi_speaker_9',
115
+ 'Speaker 0 (IT)':'v2/it_speaker_0',
116
+ 'Speaker 1 (IT)':'v2/it_speaker_1',
117
+ 'Speaker 2 (IT)':'v2/it_speaker_2',
118
+ 'Speaker 3 (IT)':'v2/it_speaker_3',
119
+ 'Speaker 4 (IT)':'v2/it_speaker_4',
120
+ 'Speaker 5 (IT)':'v2/it_speaker_5',
121
+ 'Speaker 6 (IT)':'v2/it_speaker_6',
122
+ 'Speaker 7 (IT)':'v2/it_speaker_7',
123
+ 'Speaker 8 (IT)':'v2/it_speaker_8',
124
+ 'Speaker 9 (IT)':'v2/it_speaker_9',
125
+ 'Speaker 0 (JA)':'v2/ja_speaker_0',
126
+ 'Speaker 1 (JA)':'v2/ja_speaker_1',
127
+ 'Speaker 2 (JA)':'v2/ja_speaker_2',
128
+ 'Speaker 3 (JA)':'v2/ja_speaker_3',
129
+ 'Speaker 4 (JA)':'v2/ja_speaker_4',
130
+ 'Speaker 5 (JA)':'v2/ja_speaker_5',
131
+ 'Speaker 6 (JA)':'v2/ja_speaker_6',
132
+ 'Speaker 7 (JA)':'v2/ja_speaker_7',
133
+ 'Speaker 8 (JA)':'v2/ja_speaker_8',
134
+ 'Speaker 9 (JA)':'v2/ja_speaker_9',
135
+ 'Speaker 0 (KO)':'v2/ko_speaker_0',
136
+ 'Speaker 1 (KO)':'v2/ko_speaker_1',
137
+ 'Speaker 2 (KO)':'v2/ko_speaker_2',
138
+ 'Speaker 3 (KO)':'v2/ko_speaker_3',
139
+ 'Speaker 4 (KO)':'v2/ko_speaker_4',
140
+ 'Speaker 5 (KO)':'v2/ko_speaker_5',
141
+ 'Speaker 6 (KO)':'v2/ko_speaker_6',
142
+ 'Speaker 7 (KO)':'v2/ko_speaker_7',
143
+ 'Speaker 8 (KO)':'v2/ko_speaker_8',
144
+ 'Speaker 9 (KO)':'v2/ko_speaker_9',
145
+ 'Speaker 0 (PL)':'v2/pl_speaker_0',
146
+ 'Speaker 1 (PL)':'v2/pl_speaker_1',
147
+ 'Speaker 2 (PL)':'v2/pl_speaker_2',
148
+ 'Speaker 3 (PL)':'v2/pl_speaker_3',
149
+ 'Speaker 4 (PL)':'v2/pl_speaker_4',
150
+ 'Speaker 5 (PL)':'v2/pl_speaker_5',
151
+ 'Speaker 6 (PL)':'v2/pl_speaker_6',
152
+ 'Speaker 7 (PL)':'v2/pl_speaker_7',
153
+ 'Speaker 8 (PL)':'v2/pl_speaker_8',
154
+ 'Speaker 9 (PL)':'v2/pl_speaker_9',
155
+ 'Speaker 0 (PT)':'v2/pt_speaker_0',
156
+ 'Speaker 1 (PT)':'v2/pt_speaker_1',
157
+ 'Speaker 2 (PT)':'v2/pt_speaker_2',
158
+ 'Speaker 3 (PT)':'v2/pt_speaker_3',
159
+ 'Speaker 4 (PT)':'v2/pt_speaker_4',
160
+ 'Speaker 5 (PT)':'v2/pt_speaker_5',
161
+ 'Speaker 6 (PT)':'v2/pt_speaker_6',
162
+ 'Speaker 7 (PT)':'v2/pt_speaker_7',
163
+ 'Speaker 8 (PT)':'v2/pt_speaker_8',
164
+ 'Speaker 9 (PT)':'v2/pt_speaker_9',
165
+ 'Speaker 0 (RU)':'v2/ru_speaker_0',
166
+ 'Speaker 1 (RU)':'v2/ru_speaker_1',
167
+ 'Speaker 2 (RU)':'v2/ru_speaker_2',
168
+ 'Speaker 3 (RU)':'v2/ru_speaker_3',
169
+ 'Speaker 4 (RU)':'v2/ru_speaker_4',
170
+ 'Speaker 5 (RU)':'v2/ru_speaker_5',
171
+ 'Speaker 6 (RU)':'v2/ru_speaker_6',
172
+ 'Speaker 7 (RU)':'v2/ru_speaker_7',
173
+ 'Speaker 8 (RU)':'v2/ru_speaker_8',
174
+ 'Speaker 9 (RU)':'v2/ru_speaker_9',
175
+ 'Speaker 0 (ES)':'v2/es_speaker_0',
176
+ 'Speaker 1 (ES)':'v2/es_speaker_1',
177
+ 'Speaker 2 (ES)':'v2/es_speaker_2',
178
+ 'Speaker 3 (ES)':'v2/es_speaker_3',
179
+ 'Speaker 4 (ES)':'v2/es_speaker_4',
180
+ 'Speaker 5 (ES)':'v2/es_speaker_5',
181
+ 'Speaker 6 (ES)':'v2/es_speaker_6',
182
+ 'Speaker 7 (ES)':'v2/es_speaker_7',
183
+ 'Speaker 8 (ES)':'v2/es_speaker_8',
184
+ 'Speaker 9 (ES)':'v2/es_speaker_9',
185
+ 'Speaker 0 (TR)':'v2/tr_speaker_0',
186
+ 'Speaker 1 (TR)':'v2/tr_speaker_1',
187
+ 'Speaker 2 (TR)':'v2/tr_speaker_2',
188
+ 'Speaker 3 (TR)':'v2/tr_speaker_3',
189
+ 'Speaker 4 (TR)':'v2/tr_speaker_4',
190
+ 'Speaker 5 (TR)':'v2/tr_speaker_5',
191
+ 'Speaker 6 (TR)':'v2/tr_speaker_6',
192
+ 'Speaker 7 (TR)':'v2/tr_speaker_7',
193
+ 'Speaker 8 (TR)':'v2/tr_speaker_8',
194
+ 'Speaker 9 (TR)':'v2/tr_speaker_9',
195
+ }
196
+
197
+ CUSTOM_CSS = """
198
+ #component-16 { display: none !important; }
199
+ .gradio-container .main h1 { padding-top: 60px; position: relative; }
200
+ .gradio-container .main h1::before {
201
+ content: '';
202
+ position: absolute;
203
+ top: 0;
204
+ left: 50%;
205
+ transform: translateX(-50%);
206
+ width: 253px;
207
+ height: 50px;
208
+ background-image: url('public/AkashLogo.svg');
209
+ background-repeat: no-repeat;
210
+ background-position: center;
211
+ background-size: contain;
212
+ }
213
+ """
214
+
215
+ with gr.Blocks(css=CUSTOM_CSS) as gradio_audio:
216
+ gr.Interface(
217
+ fn=text_to_speech_with_url,
218
+ inputs=[
219
+ gr.Textbox(label="Text to audio", placeholder="Enter text here...", show_copy_button=False),
220
+ gr.Dropdown(choices=list(VOICES.keys()), value="Speaker 0 (EN)", label="Voice")
221
+ ],
222
+ outputs=[
223
+ gr.Audio(label="Generated Speech"),
224
+ gr.Textbox(label="Public URL", interactive=False, show_copy_button=True)
225
+ ],
226
+ title="Audio Generator",
227
+ description="""
228
+ Transform text into natural-sounding speech using the Bark AI model.
229
+ Features support for multiple languages and voice styles.
230
+
231
+ **How to use:**
232
+ 1. Enter your text in any supported language
233
+ 2. Select a voice preset
234
+ 3. Click submit to generate speech
235
+ 4. Get the public URL to share/download the generated audio (it will expire in 24 hours)
236
+ """,
237
+ article="""<div style="text-align: center">Powered by <a href="https://huggingface.co/suno/bark-small">Bark-small</a> model and <a href="https://akash.network">Akash Network</a>, created by <a href="https://github.com/alexx855">alexx855</a></div>""",
238
+ examples=[
239
+ ["Welcome to the news. Today's top story...", "Speaker 0 (EN)"],
240
+ ["The quick brown fox jumps over the lazy dog.", "Speaker 1 (EN)"],
241
+ ["你好,今天天气真不错。", "Speaker 0 (ZH)"],
242
+ ["Bonjour, comment allez-vous aujourd'hui?", "Speaker 0 (FR)"],
243
+ ["J'aime beaucoup voyager en France.", "Speaker 1 (FR)"],
244
+ ["Guten Tag, wie geht es Ihnen?", "Speaker 0 (DE)"],
245
+ ["Das Wetter ist heute sehr schön.", "Speaker 1 (DE)"],
246
+ ["नमस्ते, आप कैसे हैं?", "Speaker 0 (HI)"],
247
+ ["मौसम बहुत सुहावन�� है।", "Speaker 1 (HI)"],
248
+ ["Buongiorno, come stai oggi?", "Speaker 0 (IT)"],
249
+ ["Mi piace molto viaggiare in Italia.", "Speaker 1 (IT)"],
250
+ ["こんにちは、お元気ですか?", "Speaker 0 (JA)"],
251
+ ["今日はとても良い天気ですね。", "Speaker 1 (JA)"],
252
+ ["안녕하세요, 오늘 기분이 어떠신가요?", "Speaker 0 (KO)"],
253
+ ["날씨가 정말 좋네요.", "Speaker 1 (KO)"],
254
+ ["Dzień dobry, jak się masz?", "Speaker 0 (PL)"],
255
+ ["Dzisiaj jest bardzo ładna pogoda.", "Speaker 1 (PL)"],
256
+ ["Olá, como está você hoje?", "Speaker 0 (PT)"],
257
+ ["O tempo está muito bonito hoje.", "Speaker 1 (PT)"],
258
+ ["Здравствуйте, как ваши дела?", "Speaker 0 (RU)"],
259
+ ["Сегодня прекрасная погода.", "Speaker 1 (RU)"],
260
+ ["Hola, ¿cómo estás hoy?", "Speaker 0 (ES)"],
261
+ ["El tiempo está muy bonito hoy.", "Speaker 1 (ES)"],
262
+ ["Merhaba, bugün nasılsınız?", "Speaker 0 (TR)"],
263
+ ["Bugün hava çok güzel.", "Speaker 1 (TR)"]
264
+ ]
265
+ )
266
+
267
+ scheduler = BackgroundScheduler()
268
+ scheduler.add_job(cleanup_old_files, 'interval', hours=1)
269
+ scheduler.start()
270
+
271
+ if __name__ == "__main__":
272
+ app = FastAPI()
273
+ app.mount("/generated", StaticFiles(directory=OUTPUT_DIR), name="generated")
274
+ app.mount("/public", StaticFiles(directory="public"), name="public")
275
+ gradio_app = gr.mount_gradio_app(app, gradio_audio, path="/", favicon_path="public/favicon.ico")
276
+ uvicorn.run(app, host="0.0.0.0", port=7860)
main.py ADDED
@@ -0,0 +1,115 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from transformers import AutoProcessor, AutoModelForTextToWaveform, BarkModel
3
+ from scipy.io.wavfile import write as write_wav
4
+ import os
5
+ import time
6
+ from datetime import datetime, timedelta
7
+ import numpy as np
8
+ from apscheduler.schedulers.background import BackgroundScheduler
9
+ import glob
10
+
11
+ # Environment settings
12
+ os.environ["SUNO_OFFLOAD_CPU"] = "True"
13
+ os.environ["SUNO_USE_SMALL_MODELS"] = "True"
14
+
15
+ # Create output directory if it doesn't exist
16
+ OUTPUT_DIR = os.environ.get("OUTPUT_DIR", "output")
17
+ os.makedirs(OUTPUT_DIR, exist_ok=True)
18
+
19
+ #create hf directory if it doesn't exist
20
+ HF_DIR = os.environ.get("HF_HOME", "~/.cache/huggingface")
21
+
22
+ def log_time(start_time, step_name):
23
+ elapsed = time.time() - start_time
24
+ print(f"{step_name}: {elapsed:.2f} seconds")
25
+ return time.time()
26
+
27
+ start = time.time()
28
+
29
+ device = "cuda" if torch.cuda.is_available() else "cpu"
30
+ processor = AutoProcessor.from_pretrained("suno/bark-small")
31
+ model = BarkModel.from_pretrained("suno/bark-small", torch_dtype=torch.float16).to(device)
32
+ model = model.to_bettertransformer()
33
+ model.enable_cpu_offload()
34
+
35
+ start = log_time(start, "Model loading")
36
+
37
+ # download and load all models
38
+ # preload_models()
39
+
40
+ def cleanup_old_files():
41
+ """Remove audio files older than 24 hour"""
42
+ cutoff_time = datetime.now() - timedelta(hours=24)
43
+ for file in glob.glob(os.path.join(OUTPUT_DIR, "audio_*.wav")):
44
+ file_time = datetime.fromtimestamp(os.path.getmtime(file))
45
+ if file_time < cutoff_time:
46
+ try:
47
+ os.remove(file)
48
+ print(f"Removed old file: {file}")
49
+ except Exception as e:
50
+ print(f"Error removing file {file}: {e}")
51
+
52
+ # Initialize scheduler
53
+ scheduler = BackgroundScheduler()
54
+ scheduler.add_job(cleanup_old_files, 'interval', hours=1)
55
+ scheduler.start()
56
+
57
+
58
+ def create_bark_audio(text, voice_preset, device):
59
+ try:
60
+ start = time.time()
61
+ # Process input text directly without reloading model
62
+ inputs = processor(
63
+ text,
64
+ voice_preset=voice_preset,
65
+ )
66
+ # Move inputs to device
67
+ inputs = {k: v.to(device) if hasattr(v, 'to') else v for k, v in inputs.items()}
68
+ start = log_time(start, "Input processing")
69
+
70
+ # Generate audio
71
+ start = time.time()
72
+ audio_array = model.generate(**inputs)
73
+ audio_array = audio_array.cpu().numpy().squeeze()
74
+
75
+ start = log_time(start, "Audio generation")
76
+
77
+ return audio_array, model.generation_config.sample_rate
78
+
79
+ except Exception as e:
80
+ print(f"Error during audio generation: {str(e)}")
81
+ raise
82
+
83
+ def save_audio(audio_array, sample_rate, prefix="audio"):
84
+ try:
85
+ start = time.time()
86
+ # Convert to float32 and normalize
87
+ audio_array = audio_array.astype(np.float32)
88
+ # Ensure audio is in the range [-1, 1]
89
+ audio_array = np.clip(audio_array, -1, 1)
90
+
91
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
92
+ filename = os.path.join(OUTPUT_DIR, f"{prefix}_{timestamp}.wav")
93
+ write_wav(filename, sample_rate, audio_array)
94
+ log_time(start, "Audio saving")
95
+ return filename
96
+
97
+ except Exception as e:
98
+ print(f"Error saving audio file: {str(e)}")
99
+ raise
100
+
101
+ def generate_speech(text, voice_preset="v2/en_speaker_6"):
102
+ device = "cuda" if torch.cuda.is_available() else "cpu"
103
+
104
+ try:
105
+ audio_array, sample_rate = create_bark_audio(text, voice_preset, device)
106
+ filename = save_audio(audio_array, sample_rate)
107
+ return filename
108
+ except Exception as e:
109
+ print(f"An error occurred: {str(e)}")
110
+ raise
111
+
112
+ if __name__ == "__main__":
113
+ text = "my cat is very cute"
114
+ filename = generate_speech(text)
115
+ print(f"Audio saved as: {filename}")
public/AkashLogo.svg ADDED
public/favicon.ico ADDED
requirements.txt ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ git+https://github.com/huggingface/transformers.git
2
+ git+https://github.com/huggingface/accelerate
3
+ git+https://github.com/huggingface/optimum.git
4
+ git+https://github.com/suno-ai/bark.git
5
+ torch
6
+ scipy
7
+ numpy
8
+ gradio
9
+ apscheduler