Senath commited on
Commit
48bd16f
·
verified ·
1 Parent(s): e8bbdcb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +34 -29
app.py CHANGED
@@ -12,39 +12,44 @@ device = "cuda" if torch.cuda.is_available() else "cpu"
12
  processor = AutoProcessor.from_pretrained(MODEL_NAME)
13
  model = SeamlessM4TModel.from_pretrained(MODEL_NAME).to(device).eval()
14
 
15
- # Gradio-compatible function using single input list (required for /run/predict to work)
16
- def translate(inputs):
17
- try:
18
- text_input, audio_input, source_lang, target_lang, auto_detect = inputs
19
- src = None if auto_detect else source_lang
20
- translated_text = ""
21
- translated_audio = None
22
 
23
- if text_input:
24
- proc_inputs = processor(text=text_input, src_lang=src, return_tensors="pt").to(device)
25
- speech = model.generate(**proc_inputs, tgt_lang=target_lang)[0].cpu().numpy().squeeze()
26
- text_tokens = model.generate(**proc_inputs, tgt_lang=target_lang, generate_speech=False)
27
- translated_text = processor.decode(text_tokens[0].tolist()[0], skip_special_tokens=True)
28
- translated_audio = (16000, speech)
29
 
30
- elif audio_input:
31
- waveform, sr = torchaudio.load(audio_input)
32
- waveform = torchaudio.functional.resample(waveform, sr, 16000)
33
- proc_inputs = processor(audios=waveform, src_lang=src, return_tensors="pt").to(device)
34
- speech = model.generate(**proc_inputs, tgt_lang=target_lang)[0].cpu().numpy().squeeze()
35
- text_tokens = model.generate(**proc_inputs, tgt_lang=target_lang, generate_speech=False)
36
- translated_text = processor.decode(text_tokens[0].tolist()[0], skip_special_tokens=True)
37
- translated_audio = (16000, speech)
38
 
39
- else:
40
- translated_text = "No input provided."
41
 
42
- return [translated_text, translated_audio]
 
 
 
 
43
 
44
- except Exception as e:
45
- return [f"Error: {str(e)}", None]
 
 
 
 
 
 
 
 
 
 
46
 
47
- # Gradio Interface
48
  iface = gr.Interface(
49
  fn=translate,
50
  inputs=[
@@ -52,7 +57,7 @@ iface = gr.Interface(
52
  gr.Audio(type="filepath", label="Input Audio (optional)"),
53
  gr.Textbox(label="Source Language (e.g. eng)"),
54
  gr.Textbox(label="Target Language (e.g. fra)"),
55
- gr.Checkbox(label="Auto-detect source language", value=True)
56
  ],
57
  outputs=[
58
  gr.Textbox(label="Translated Text"),
@@ -61,4 +66,4 @@ iface = gr.Interface(
61
  title="iVoice Translate (Text + Speech)"
62
  ).queue()
63
 
64
- iface.launch()
 
12
  processor = AutoProcessor.from_pretrained(MODEL_NAME)
13
  model = SeamlessM4TModel.from_pretrained(MODEL_NAME).to(device).eval()
14
 
15
+ # Main translation function
16
+ def translate(text_input, audio_input, source_lang, target_lang, auto_detect):
17
+ src = None if auto_detect else source_lang
18
+ translated_text = None
19
+ translated_audio = None
 
 
20
 
21
+ # If text is provided
22
+ if text_input:
23
+ inputs = processor(text=text_input, src_lang=src, return_tensors="pt").to(device)
24
+
25
+ # Generate speech
26
+ speech = model.generate(**inputs, tgt_lang=target_lang)[0].cpu().numpy().squeeze()
27
 
28
+ # Generate text
29
+ text_tokens = model.generate(**inputs, tgt_lang=target_lang, generate_speech=False)
30
+ translated_text = processor.decode(text_tokens[0].tolist()[0], skip_special_tokens=True)
 
 
 
 
 
31
 
32
+ translated_audio = (16000, speech)
 
33
 
34
+ # If audio is provided
35
+ elif audio_input:
36
+ waveform, sr = torchaudio.load(audio_input)
37
+ waveform = torchaudio.functional.resample(waveform, sr, 16000)
38
+ inputs = processor(audios=waveform, src_lang=src, return_tensors="pt").to(device)
39
 
40
+ # Generate speech
41
+ speech = model.generate(**inputs, tgt_lang=target_lang)[0].cpu().numpy().squeeze()
42
+
43
+ # Generate text
44
+ text_tokens = model.generate(**inputs, tgt_lang=target_lang, generate_speech=False)
45
+ translated_text = processor.decode(text_tokens[0].tolist()[0], skip_special_tokens=True)
46
+
47
+ translated_audio = (16000, speech)
48
+
49
+ if translated_text or translated_audio:
50
+ return translated_text or "", translated_audio
51
+ return "No input provided.", None
52
 
 
53
  iface = gr.Interface(
54
  fn=translate,
55
  inputs=[
 
57
  gr.Audio(type="filepath", label="Input Audio (optional)"),
58
  gr.Textbox(label="Source Language (e.g. eng)"),
59
  gr.Textbox(label="Target Language (e.g. fra)"),
60
+ gr.Checkbox(label="Auto-detect source language")
61
  ],
62
  outputs=[
63
  gr.Textbox(label="Translated Text"),
 
66
  title="iVoice Translate (Text + Speech)"
67
  ).queue()
68
 
69
+ iface.launch() # <== Add this line and remove the entire if __name__ == "__main__"