Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| import whisper | |
| from translate import Translator | |
| from TTS.api import TTS | |
| import uuid | |
| import os | |
| from pathlib import Path | |
| import gc | |
| import torch | |
| os.environ["COQUI_TOS_AGREED"] = "1" | |
| model = whisper.load_model("base") | |
| tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2") | |
| output_dir = "output_audio" | |
| os.makedirs(output_dir, exist_ok=True) | |
| def transcribeaudio(audiofile): | |
| print("Transcribing audio...") | |
| tresult = model.transcribe(audiofile) | |
| if "text" not in tresult: | |
| print("Transcription failed.") | |
| return {"status": "error", "error": "Transcription failed"} | |
| audio = whisper.load_audio(audiofile) | |
| audio = whisper.pad_or_trim(audio) | |
| mel = whisper.log_mel_spectrogram(audio).to(model.device) | |
| _, probs = model.detect_language(mel) | |
| detected_language = max(probs, key=probs.get) | |
| print(f"Detected language: {detected_language}") | |
| return {"text": tresult["text"], "language": detected_language} | |
| def translatetext(text, source_lang, target_lang): | |
| try: | |
| translator = Translator(from_lang=source_lang, to_lang=target_lang) | |
| translated_text = translator.translate(text) | |
| print(f"Translated text: {translated_text}") | |
| return translated_text | |
| except Exception as e: | |
| print(f"Error translating to {target_lang}: {str(e)}") | |
| return f"Error: Could not translate to {target_lang}" | |
| def readtranslation(text, audiofile, language): | |
| output_path = os.path.join(output_dir, f"{language}_{uuid.uuid4()}.wav") | |
| print(f"Generating TTS for text: {text}") | |
| tts.tts_to_file(text=text, file_path=output_path, speaker_wav=audiofile, language=language) | |
| print(f"Generated audio file at: {output_path}") | |
| return output_path | |
| def v2vtranslate(audiofile, selected_lang,COQUI_TOS_AGREED, progress=gr.Progress()): | |
| if COQUI_TOS_AGREED == True: | |
| progress(0, desc="Starting process...") | |
| try: | |
| progress(0.2, desc="Transcribing audio...") | |
| transcription_result = transcribeaudio(audiofile) | |
| if isinstance(transcription_result, dict) and transcription_result.get("status") == "error": | |
| raise gr.Error(transcription_result["error"]) | |
| text = transcription_result["text"] | |
| detected_language = transcription_result["language"] | |
| progress(0.4, desc="Translating text...") | |
| translated_text = translatetext(text, detected_language, selected_lang) | |
| progress(0.7, desc="Generating audio...") | |
| audio_path = readtranslation(translated_text, audiofile, selected_lang) | |
| progress(1.0, desc="Process complete!") | |
| return audio_path, translated_text | |
| except Exception as e: | |
| raise gr.Error(f"An error occurred: {str(e)}") | |
| finally: | |
| cleanup_memory() | |
| else: | |
| gr.Warning("Please accept the Terms & Condition!") | |
| return ( | |
| None, | |
| None, | |
| None, | |
| None, | |
| ) | |
| with gr.Blocks() as demo: | |
| gr.Markdown("## Record yourself in any language and immediately receive voice translations.") | |
| with gr.Row(): | |
| with gr.Column(): | |
| audio_input = gr.Audio( | |
| sources=["microphone"], | |
| type="filepath", | |
| show_download_button=True, | |
| max_length=15, | |
| label="Record your voice", | |
| waveform_options=gr.WaveformOptions( | |
| waveform_color="#01C6FF", | |
| waveform_progress_color="#0066B4", | |
| skip_length=2, | |
| show_controls=False,) | |
| ) | |
| language_gr = gr.Dropdown( | |
| label="Language", | |
| info="Select an output language for the synthesised speech", | |
| choices=[ | |
| "en", | |
| "es", | |
| "fr", | |
| "de", | |
| "it", | |
| "pt", | |
| "pl", | |
| "tr", | |
| "ru", | |
| "nl", | |
| "cs", | |
| "ar", | |
| "zh-cn", | |
| "ja", | |
| "ko", | |
| "hu", | |
| "hi" | |
| ], | |
| max_choices=1, | |
| value="es", | |
| ) | |
| tos_gr = gr.Checkbox( | |
| label="Agree", | |
| value=False, | |
| info="I agree to the terms of the CPML: https://coqui.ai/cpml", | |
| ) | |
| submit = gr.Button("Submit", variant="primary") | |
| reset = gr.Button("Reset") | |
| with gr.Row(): | |
| output_audio = gr.Audio(label="Translated Audio", interactive=False) | |
| output_text = gr.Markdown() | |
| output_components = [output_audio, output_text] | |
| submit.click(fn=v2vtranslate, inputs=[audio_input, language_gr,tos_gr], outputs=output_components, show_progress=True) | |
| reset.click(fn=lambda: None, inputs=None, outputs=output_components + [audio_input]) | |
| def cleanup_memory(): | |
| gc.collect() | |
| torch.cuda.empty_cache() | |
| print("Memory cleaned up") | |
| if __name__ == "__main__": | |
| demo.launch() | |