Bhavibond commited on
Commit
8faecdb
·
verified ·
1 Parent(s): b3cdb51

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +61 -0
app.py ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from transformers import pipeline
3
+ import torch
4
+
5
+ # Load lightweight models
6
+ transcriber = pipeline("automatic-speech-recognition", model="openai/whisper-tiny")
7
+ translator = pipeline("translation", model="facebook/m2m100_418M")
8
+ tts = pipeline("text-to-speech", model="espnet/kan-bayashi_ljspeech_vits", device=torch.device('cpu'))
9
+
10
+ # Function to handle transcription, translation, and TTS
11
+ def process_audio(audio, target_lang):
12
+ if audio is None:
13
+ return "Please upload an audio file.", None, None
14
+
15
+ try:
16
+ # Transcribe
17
+ transcription = transcriber(audio["name"])['text']
18
+
19
+ # Translate (if target language is different from source)
20
+ if target_lang != "en":
21
+ translated_text = translator(transcription, src_lang="en", tgt_lang=target_lang)[0]['translation_text']
22
+ else:
23
+ translated_text = transcription
24
+
25
+ # Generate speech from translated text
26
+ audio_output = tts(translated_text)
27
+
28
+ # Create Braille-compatible text file
29
+ braille_file = "output.brf"
30
+ with open(braille_file, "w", encoding="utf-8") as f:
31
+ f.write(translated_text)
32
+
33
+ return translated_text, (audio_output, "output.wav"), braille_file
34
+
35
+ except Exception as e:
36
+ return f"Error: {str(e)}", None, None
37
+
38
+ # Define UI
39
+ inputs = [
40
+ gr.Audio(type="filepath"),
41
+ gr.Dropdown(choices=["hi", "te", "kn", "en", "hu"], label="Target Language")
42
+ ]
43
+
44
+ outputs = [
45
+ gr.Text(label="Translated Text"),
46
+ gr.Audio(label="Generated Speech"),
47
+ gr.File(label="Download Braille-Compatible File")
48
+ ]
49
+
50
+ # Create Gradio app
51
+ app = gr.Interface(
52
+ fn=process_audio,
53
+ inputs=inputs,
54
+ outputs=outputs,
55
+ title="Multi-Language Voice Translator",
56
+ description="Transcribe, translate, and generate speech in multiple languages with accessibility features."
57
+ )
58
+
59
+ # Launch app
60
+ if __name__ == "__main__":
61
+ app.launch()