Spaces:

LonewolfT141
/

Text_To_Speech_Model

Sleeping

App Files Files Community

LonewolfT141 commited on Mar 16, 2025

Commit

1166ec4

verified ·

1 Parent(s): 857da1c

Create app.py

Browse files

Files changed (1) hide show

app.py +74 -0

app.py ADDED Viewed

	@@ -0,0 +1,74 @@

+# ==================================
+# 2) IMPORT LIBRARIES
+# ==================================
+import gradio as gr
+import whisper
+import tempfile
+from zyphra import ZyphraClient  # Assumes the Zyphra package provides this client
+# ==================================
+# 3) LOAD WHISPER MODEL
+# ==================================
+model = whisper.load_model("base")
+# ==================================
+# 4) DEFINE PROCESSING FUNCTION
+# ==================================
+def process_media(media_file):
+    """
+    This function:
+      - Transcribes and translates the uploaded audio/video into English using Whisper.
+      - Uses ZyphraClient (synchronous) to convert the English text to speech.
+      - Returns both the synthesized audio and the English subtitles.
+    """
+    try:
+        # Transcribe and translate the media into English
+        result = model.transcribe(media_file, task="translate")
+        english_transcription = result["text"]
+        # ==================================
+        # Zyphra TTS API CALL using ZyphraClient
+        # ==================================
+        api_key = "zsk-c8741b6d61d76f872442699c84ed180e98f43b2b2cf4ed8f8c8da72c70fcfbb3"
+        with ZyphraClient(api_key=api_key) as client:
+            # Get audio bytes for the given text; adjust speaking_rate if desired.
+            audio_data = client.audio.speech.create(
+                text=english_transcription,
+                speaking_rate=15
+            )
+        # Write the returned audio data to a temporary file
+        temp_audio = tempfile.NamedTemporaryFile(delete=False, suffix=".wav")
+        temp_audio.write(audio_data)
+        temp_audio.close()
+        synthesized_audio = temp_audio.name
+        return synthesized_audio, english_transcription
+    except Exception as e:
+        print("Error during processing:", e)
+        return None, f"Error: {str(e)}"
+# ==================================
+# 5) BUILD GRADIO INTERFACE
+# ==================================
+interface = gr.Interface(
+    fn=process_media,
+    inputs=gr.File(label="Upload Audio or Video", file_types=["audio", "video"]),
+    outputs=[
+        gr.Audio(type="filepath", label="Synthesized English Audio"),
+        gr.Textbox(label="English Subtitles")
+    ],
+    title="Multilingual Media to English TTS Pipeline (Zyphra)",
+    description=(
+        "Upload an audio or video file in any language. The file is transcribed and translated into "
+        "English using Whisper, then converted to speech via the Zyphra TTS service using ZyphraClient."
+    )
+)
+# ==================================
+# 6) LAUNCH THE APP
+# ==================================
+interface.launch(debug=True)