LonewolfT141 commited on
Commit
1166ec4
·
verified ·
1 Parent(s): 857da1c

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +74 -0
app.py ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+
3
+ # ==================================
4
+ # 2) IMPORT LIBRARIES
5
+ # ==================================
6
+ import gradio as gr
7
+ import whisper
8
+ import tempfile
9
+ from zyphra import ZyphraClient # Assumes the Zyphra package provides this client
10
+
11
+ # ==================================
12
+ # 3) LOAD WHISPER MODEL
13
+ # ==================================
14
+ model = whisper.load_model("base")
15
+
16
+ # ==================================
17
+ # 4) DEFINE PROCESSING FUNCTION
18
+ # ==================================
19
+ def process_media(media_file):
20
+ """
21
+ This function:
22
+ - Transcribes and translates the uploaded audio/video into English using Whisper.
23
+ - Uses ZyphraClient (synchronous) to convert the English text to speech.
24
+ - Returns both the synthesized audio and the English subtitles.
25
+ """
26
+ try:
27
+ # Transcribe and translate the media into English
28
+ result = model.transcribe(media_file, task="translate")
29
+ english_transcription = result["text"]
30
+
31
+ # ==================================
32
+ # Zyphra TTS API CALL using ZyphraClient
33
+ # ==================================
34
+ api_key = "zsk-c8741b6d61d76f872442699c84ed180e98f43b2b2cf4ed8f8c8da72c70fcfbb3"
35
+ with ZyphraClient(api_key=api_key) as client:
36
+ # Get audio bytes for the given text; adjust speaking_rate if desired.
37
+ audio_data = client.audio.speech.create(
38
+ text=english_transcription,
39
+ speaking_rate=15
40
+ )
41
+
42
+ # Write the returned audio data to a temporary file
43
+ temp_audio = tempfile.NamedTemporaryFile(delete=False, suffix=".wav")
44
+ temp_audio.write(audio_data)
45
+ temp_audio.close()
46
+ synthesized_audio = temp_audio.name
47
+
48
+ return synthesized_audio, english_transcription
49
+
50
+ except Exception as e:
51
+ print("Error during processing:", e)
52
+ return None, f"Error: {str(e)}"
53
+
54
+ # ==================================
55
+ # 5) BUILD GRADIO INTERFACE
56
+ # ==================================
57
+ interface = gr.Interface(
58
+ fn=process_media,
59
+ inputs=gr.File(label="Upload Audio or Video", file_types=["audio", "video"]),
60
+ outputs=[
61
+ gr.Audio(type="filepath", label="Synthesized English Audio"),
62
+ gr.Textbox(label="English Subtitles")
63
+ ],
64
+ title="Multilingual Media to English TTS Pipeline (Zyphra)",
65
+ description=(
66
+ "Upload an audio or video file in any language. The file is transcribed and translated into "
67
+ "English using Whisper, then converted to speech via the Zyphra TTS service using ZyphraClient."
68
+ )
69
+ )
70
+
71
+ # ==================================
72
+ # 6) LAUNCH THE APP
73
+ # ==================================
74
+ interface.launch(debug=True)