# ================================== # 2) IMPORT LIBRARIES # ================================== import gradio as gr import whisper import tempfile from zyphra import ZyphraClient # Assumes the Zyphra package provides this client # ================================== # 3) LOAD WHISPER MODEL # ================================== model = whisper.load_model("base") # ================================== # 4) DEFINE PROCESSING FUNCTION # ================================== def process_media(media_file): """ This function: - Transcribes and translates the uploaded audio/video into English using Whisper. - Uses ZyphraClient (synchronous) to convert the English text to speech. - Returns both the synthesized audio and the English subtitles. """ try: # Transcribe and translate the media into English result = model.transcribe(media_file, task="translate") english_transcription = result["text"] # ================================== # Zyphra TTS API CALL using ZyphraClient # ================================== api_key = "zsk-c8741b6d61d76f872442699c84ed180e98f43b2b2cf4ed8f8c8da72c70fcfbb3" with ZyphraClient(api_key=api_key) as client: # Get audio bytes for the given text; adjust speaking_rate if desired. audio_data = client.audio.speech.create( text=english_transcription, speaking_rate=15 ) # Write the returned audio data to a temporary file temp_audio = tempfile.NamedTemporaryFile(delete=False, suffix=".wav") temp_audio.write(audio_data) temp_audio.close() synthesized_audio = temp_audio.name return synthesized_audio, english_transcription except Exception as e: print("Error during processing:", e) return None, f"Error: {str(e)}" # ================================== # 5) BUILD GRADIO INTERFACE # ================================== interface = gr.Interface( fn=process_media, inputs=gr.File(label="Upload Audio or Video", file_types=["audio", "video"]), outputs=[ gr.Audio(type="filepath", label="Synthesized English Audio"), gr.Textbox(label="English Subtitles") ], title="Multilingual Media to English TTS Pipeline (Zyphra)", description=( "Upload an audio or video file in any language. The file is transcribed and translated into " "English using Whisper, then converted to speech via the Zyphra TTS service using ZyphraClient." ) ) # ================================== # 6) LAUNCH THE APP # ================================== interface.launch(debug=True)