Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -22,15 +22,7 @@ tts = TextToSpeech(kv_cache=True, use_deepspeed=True, half=True)
|
|
| 22 |
|
| 23 |
def convert_audio(filepath, voice="indian_F_1"):
|
| 24 |
# Transcribe audio to text using STT
|
| 25 |
-
|
| 26 |
-
filepath,
|
| 27 |
-
max_new_tokens=256,
|
| 28 |
-
generate_kwargs={
|
| 29 |
-
"task": "transcribe",
|
| 30 |
-
"language": "english",
|
| 31 |
-
},
|
| 32 |
-
chunk_length_s=30,
|
| 33 |
-
batch_size=8
|
| 34 |
)
|
| 35 |
transcribed_text = transcription_output["text"]
|
| 36 |
|
|
@@ -42,15 +34,14 @@ def convert_audio(filepath, voice="indian_F_1"):
|
|
| 42 |
for audio_frame in tts.tts_with_preset(
|
| 43 |
text,
|
| 44 |
voice_samples=voice_samples,
|
| 45 |
-
|
| 46 |
-
preset="ultra_fast",
|
| 47 |
k=1
|
| 48 |
):
|
| 49 |
audio_frames.append(audio_frame.cpu().detach().numpy())
|
| 50 |
|
| 51 |
# Joining the audio frames for output using numpy's concatenate
|
| 52 |
final_audio = np.concatenate(audio_frames, axis=0)
|
| 53 |
-
|
| 54 |
|
| 55 |
interface = gr.Interface(
|
| 56 |
fn=convert_audio,
|
|
|
|
| 22 |
|
| 23 |
def convert_audio(filepath, voice="indian_F_1"):
|
| 24 |
# Transcribe audio to text using STT
|
| 25 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 26 |
)
|
| 27 |
transcribed_text = transcription_output["text"]
|
| 28 |
|
|
|
|
| 34 |
for audio_frame in tts.tts_with_preset(
|
| 35 |
text,
|
| 36 |
voice_samples=voice_samples,
|
| 37 |
+
|
|
|
|
| 38 |
k=1
|
| 39 |
):
|
| 40 |
audio_frames.append(audio_frame.cpu().detach().numpy())
|
| 41 |
|
| 42 |
# Joining the audio frames for output using numpy's concatenate
|
| 43 |
final_audio = np.concatenate(audio_frames, axis=0)
|
| 44 |
+
|
| 45 |
|
| 46 |
interface = gr.Interface(
|
| 47 |
fn=convert_audio,
|