artificialguybr commited on
Commit
3219df3
·
1 Parent(s): 5ec4cf9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +26 -27
app.py CHANGED
@@ -2,45 +2,43 @@ import gradio as gr
2
  from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
3
  from subprocess import run
4
  from faster_whisper import WhisperModel
5
- import soundfile as sf
6
- import numpy as np
7
  import json
 
8
 
9
-
10
  with open('language_codes.json', 'r') as f:
11
  lang_codes = json.load(f)
12
- # Inicialize o modelo NLLB
13
- tokenizer = AutoModelForSeq2SeqLM.from_pretrained("facebook/nllb-200-distilled-600M")
14
- model = AutoTokenizer.from_pretrained("facebook/nllb-200-distilled-600M")
15
 
16
- # Inicialize o modelo Whisper
17
- model_size = "large-v2"
18
- whisper_model = WhisperModel(model_size, device="cuda", compute_type="float16")
 
19
 
20
  def process_video(radio, video, target_language, use_wav2lip):
21
-
22
- # 1. Extraia o áudio do vídeo usando FFMPEG
23
- run(["ffmpeg", "-i", video.name, "audio.wav"])
24
 
25
- # 2. Transcrição usando Whisper
26
- segments, _ = whisper_model.transcribe("audio.wav")
27
  transcript = " ".join([segment.text for segment in segments])
28
 
29
- # 3. Tradução usando NLLB
30
- flores_code = lang_codes.get(target_language, "eng_Latn") # Default para inglês se não encontrado
31
  inputs = tokenizer(transcript, return_tensors="pt")
32
- lang_code = {"English": "eng_Latn", "Spanish": "spa_Latn", "French": "fra_Latn"} # Adicione mais idiomas conforme necessário
33
- translated_tokens = model.generate(**inputs, forced_bos_token_id=tokenizer.lang_code_to_id[lang_code[target_language]], max_length=100)
34
  translated_text = tokenizer.batch_decode(translated_tokens, skip_special_tokens=True)[0]
35
-
36
- return "Tradução realizada para o idioma com código FLORES-200: " + flores_code
37
- # 4. Queimar a legenda traduzida no vídeo
38
- with open("subtitle.srt", "w") as f:
39
- f.write("1\n00:00:00,000 --> 00:00:10,000\n" + translated_text) # Este é um exemplo simples. Você pode dividir o texto em várias partes e ajustar os tempos.
40
-
41
- run(["ffmpeg", "-i", video.name, "-vf", "subtitles=subtitle.srt", "output_video.mp4"])
42
 
43
- return "output_video.mp4"
 
 
 
 
 
 
 
 
 
44
 
45
  # Interface Gradio
46
  iface = gr.Interface(
@@ -49,8 +47,9 @@ iface = gr.Interface(
49
  gr.Radio(["Upload", "Record"], value="Upload", show_label=False),
50
  gr.Video(),
51
  gr.Dropdown(choices=list(lang_codes.keys()), label="Target Language for Dubbing", value="English"),
 
52
  ],
53
- outputs=gr.Textbox(),
54
  live=False,
55
  title="AI Video Dubbing"
56
  )
 
2
  from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
3
  from subprocess import run
4
  from faster_whisper import WhisperModel
 
 
5
  import json
6
+ import tempfile
7
 
8
+ # Carregar mapeamento de idiomas
9
  with open('language_codes.json', 'r') as f:
10
  lang_codes = json.load(f)
 
 
 
11
 
12
+ # Inicializar modelos
13
+ tokenizer = AutoTokenizer.from_pretrained("facebook/nllb-200-distilled-600M")
14
+ model = AutoModelForSeq2SeqLM.from_pretrained("facebook/nllb-200-distilled-600M")
15
+ whisper_model = WhisperModel("large-v2", device="cuda", compute_type="float16")
16
 
17
  def process_video(radio, video, target_language, use_wav2lip):
18
+ # 1. Extrair áudio
19
+ audio_file = tempfile.NamedTemporaryFile(suffix=".wav").name
20
+ run(["ffmpeg", "-i", video.name, audio_file])
21
 
22
+ # 2. Transcrição
23
+ segments, _ = whisper_model.transcribe(audio_file)
24
  transcript = " ".join([segment.text for segment in segments])
25
 
26
+ # 3. Tradução
27
+ flores_code = lang_codes.get(target_language, "eng_Latn")
28
  inputs = tokenizer(transcript, return_tensors="pt")
29
+ translated_tokens = model.generate(**inputs, forced_bos_token_id=tokenizer.lang_code_to_id[flores_code], max_length=100)
 
30
  translated_text = tokenizer.batch_decode(translated_tokens, skip_special_tokens=True)[0]
 
 
 
 
 
 
 
31
 
32
+ # 4. Criar arquivo de legenda
33
+ subtitle_file = tempfile.NamedTemporaryFile(suffix=".srt", delete=False).name
34
+ with open(subtitle_file, "w") as f:
35
+ f.write("1\n00:00:00,000 --> 00:00:10,000\n" + translated_text)
36
+
37
+ # 5. Incorporar legenda
38
+ output_video = tempfile.NamedTemporaryFile(suffix=".mp4", delete=False).name
39
+ run(["ffmpeg", "-i", video.name, "-vf", f"subtitles={subtitle_file}", output_video])
40
+
41
+ return output_video
42
 
43
  # Interface Gradio
44
  iface = gr.Interface(
 
47
  gr.Radio(["Upload", "Record"], value="Upload", show_label=False),
48
  gr.Video(),
49
  gr.Dropdown(choices=list(lang_codes.keys()), label="Target Language for Dubbing", value="English"),
50
+ gr.Checkbox(label="Video has a close-up face. Use Wav2lip.", value=False)
51
  ],
52
+ outputs=gr.Video(),
53
  live=False,
54
  title="AI Video Dubbing"
55
  )