Senath commited on
Commit
5ca0193
·
verified ·
1 Parent(s): 61e4fb2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +30 -38
app.py CHANGED
@@ -1,53 +1,45 @@
1
  import gradio as gr
2
- import torch
3
  import torchaudio
 
4
  from transformers import AutoProcessor, SeamlessM4TModel
5
 
6
- # Load the processor and model
7
- processor = AutoProcessor.from_pretrained("facebook/hf-seamless-m4t-medium")
8
  model = SeamlessM4TModel.from_pretrained("facebook/hf-seamless-m4t-medium")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
 
10
- # Define the translation function
11
- def translate(input_text, input_audio, source_lang, target_lang):
12
- outputs = {}
13
- if input_text:
14
- # Process text input
15
- text_inputs = processor(text=input_text, src_lang=source_lang, return_tensors="pt")
16
- generated = model.generate(**text_inputs, tgt_lang=target_lang)
17
- outputs["Translated Text"] = processor.decode(generated[0], skip_special_tokens=True)
18
- # Generate speech from text
19
- audio_output = model.generate(**text_inputs, tgt_lang=target_lang, generate_speech=True)
20
- outputs["Translated Audio"] = (16000, audio_output[0].cpu().numpy())
21
- elif input_audio:
22
- # Process audio input
23
- waveform, sample_rate = torchaudio.load(input_audio)
24
- if sample_rate != 16000:
25
- waveform = torchaudio.functional.resample(waveform, orig_freq=sample_rate, new_freq=16000)
26
- audio_inputs = processor(audios=waveform.squeeze().numpy(), return_tensors="pt")
27
- generated = model.generate(**audio_inputs, tgt_lang=target_lang)
28
- outputs["Translated Text"] = processor.decode(generated[0], skip_special_tokens=True)
29
- # Generate speech from audio
30
- audio_output = model.generate(**audio_inputs, tgt_lang=target_lang, generate_speech=True)
31
- outputs["Translated Audio"] = (16000, audio_output[0].cpu().numpy())
32
- else:
33
- outputs["Error"] = "Please provide either text or audio input."
34
- return outputs.get("Translated Text", ""), outputs.get("Translated Audio", None)
35
 
36
- # Define the Gradio interface
37
- iface = gr.Interface(
38
  fn=translate,
39
  inputs=[
40
- gr.Textbox(label="Input Text"),
41
- gr.Audio(source="upload", type="filepath", label="Input Audio"),
42
- gr.Textbox(label="Source Language (e.g., 'eng')"),
43
- gr.Textbox(label="Target Language (e.g., 'hin')")
44
  ],
45
  outputs=[
46
  gr.Textbox(label="Translated Text"),
47
- gr.Audio(label="Translated Audio")
48
  ],
49
- title="iVoice Seamless Translator",
50
- description="Translate text or speech into another language with both text and speech outputs."
51
  )
52
 
53
- iface.launch()
 
 
1
  import gradio as gr
 
2
  import torchaudio
3
+ import torch
4
  from transformers import AutoProcessor, SeamlessM4TModel
5
 
6
+ # Load model and processor
 
7
  model = SeamlessM4TModel.from_pretrained("facebook/hf-seamless-m4t-medium")
8
+ processor = AutoProcessor.from_pretrained("facebook/hf-seamless-m4t-medium")
9
+
10
+ def translate(text_input, audio_file, target_lang):
11
+ results = []
12
+
13
+ if text_input:
14
+ text_inputs = processor(text=text_input, return_tensors="pt")
15
+ audio_out = model.generate(**text_inputs, tgt_lang=target_lang)[0].cpu().numpy().squeeze()
16
+ results.append(("Translated from text", audio_out))
17
+
18
+ if audio_file:
19
+ audio_waveform, sr = torchaudio.load(audio_file)
20
+ audio_waveform = torchaudio.functional.resample(audio_waveform, sr, 16000)
21
+ audio_inputs = processor(audios=audio_waveform, return_tensors="pt")
22
+ audio_out = model.generate(**audio_inputs, tgt_lang=target_lang)[0].cpu().numpy().squeeze()
23
+ results.append(("Translated from audio", audio_out))
24
 
25
+ combined_text = "\n".join([r[0] for r in results])
26
+ combined_audio = torch.tensor(results[0][1]) if results else None
27
+ return combined_text, (16000, combined_audio)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
 
29
+ demo = gr.Interface(
 
30
  fn=translate,
31
  inputs=[
32
+ gr.Textbox(label="Input Text", placeholder="Enter text to translate (optional)"),
33
+ gr.Audio(type="filepath", label="Input Audio (optional)"),
34
+ gr.Dropdown(choices=["eng", "hin", "spa", "fra", "por"], label="Target Language", value="hin")
 
35
  ],
36
  outputs=[
37
  gr.Textbox(label="Translated Text"),
38
+ gr.Audio(label="Translated Speech")
39
  ],
40
+ title="SeamlessM4T Translation",
41
+ description="Enter text or audio, choose a target language, and get translation + speech."
42
  )
43
 
44
+ if __name__ == "__main__":
45
+ demo.launch()