artificialguybr commited on
Commit
2de3a57
·
1 Parent(s): da316b7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +28 -21
app.py CHANGED
@@ -1,39 +1,42 @@
1
  import gradio as gr
2
  from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
3
- from subprocess import run
4
  from faster_whisper import WhisperModel
5
  import json
6
  import tempfile
7
- import os # Importando o módulo os
8
- import ffmpeg
9
  from zipfile import ZipFile
10
  import stat
11
 
 
 
 
 
 
 
 
 
12
  ZipFile("ffmpeg.zip").extractall()
13
  st = os.stat('ffmpeg')
14
  os.chmod('ffmpeg', st.st_mode | stat.S_IEXEC)
15
- # Carregar mapeamento de idiomas
16
  with open('language_codes.json', 'r') as f:
17
  lang_codes = json.load(f)
18
 
19
- # Inicializar modelos
20
  tokenizer = AutoTokenizer.from_pretrained("facebook/nllb-200-distilled-600M")
21
  model = AutoModelForSeq2SeqLM.from_pretrained("facebook/nllb-200-distilled-600M")
22
  whisper_model = WhisperModel("large-v2", device="cuda", compute_type="float16")
23
 
24
  def process_video(Video, target_language):
25
- print(type(Video))
26
  audio_file = tempfile.NamedTemporaryFile(suffix=".wav").name
27
- print(f"Running FFmpeg command: ffmpeg -i {Video} {audio_file}")
28
- run(["ffmpeg", "-i", Video, audio_file])
29
- print(f"Checking if temporary file exists: {os.path.exists(audio_file)}")
30
- print(f"Checking if video file exists: {os.path.exists(Video)}")
31
 
32
- # 2. Transcrição
33
- segments, _ = whisper_model.transcribe(audio_file, beam_size=5) # Usando audio_file
 
 
 
34
  segments = list(segments)
35
 
36
- # Criar o arquivo .srt com carimbos de tempo
37
  temp_transcript_file = tempfile.NamedTemporaryFile(delete=False, suffix=".srt")
38
  with open(temp_transcript_file.name, "w", encoding="utf-8") as f:
39
  counter = 1
@@ -52,8 +55,7 @@ def process_video(Video, target_language):
52
  f.write(f"{segment.text}\n\n")
53
  counter += 1
54
 
55
- # 3. Tradução
56
- flores_code = lang_codes.get(target_language, "eng_Latn") # Definindo flores_code
57
  temp_translated_file = tempfile.NamedTemporaryFile(delete=False, suffix=".srt")
58
  with open(temp_transcript_file.name, "r", encoding="utf-8") as infile, open(temp_translated_file.name, "w", encoding="utf-8") as outfile:
59
  for line in infile:
@@ -67,15 +69,20 @@ def process_video(Video, target_language):
67
  else:
68
  outfile.write("\n")
69
 
70
- # 5. Incorporar legenda
71
- output_video = "output_video.mp4" # Definindo output_video
72
- run(["ffmpeg", "-i", Video, "-vf", f"subtitles={temp_translated_file.name}", output_video])
 
 
 
 
 
 
73
  os.unlink(temp_transcript_file.name)
74
  os.unlink(temp_translated_file.name)
75
 
76
- return output_video # Retornando output_video
77
 
78
- # Interface Gradio
79
  iface = gr.Interface(
80
  fn=process_video,
81
  inputs=[
@@ -87,4 +94,4 @@ iface = gr.Interface(
87
  title="VIDEO TRANSCRIPTION AND TRANSLATION"
88
  )
89
 
90
- iface.launch()
 
1
  import gradio as gr
2
  from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
3
+ from subprocess import run, CalledProcessError
4
  from faster_whisper import WhisperModel
5
  import json
6
  import tempfile
7
+ import os
 
8
  from zipfile import ZipFile
9
  import stat
10
 
11
+ def run_command(command):
12
+ try:
13
+ run(command, check=True)
14
+ except CalledProcessError as e:
15
+ print(f"Command failed with error: {e}")
16
+ return False
17
+ return True
18
+
19
  ZipFile("ffmpeg.zip").extractall()
20
  st = os.stat('ffmpeg')
21
  os.chmod('ffmpeg', st.st_mode | stat.S_IEXEC)
22
+
23
  with open('language_codes.json', 'r') as f:
24
  lang_codes = json.load(f)
25
 
 
26
  tokenizer = AutoTokenizer.from_pretrained("facebook/nllb-200-distilled-600M")
27
  model = AutoModelForSeq2SeqLM.from_pretrained("facebook/nllb-200-distilled-600M")
28
  whisper_model = WhisperModel("large-v2", device="cuda", compute_type="float16")
29
 
30
  def process_video(Video, target_language):
 
31
  audio_file = tempfile.NamedTemporaryFile(suffix=".wav").name
 
 
 
 
32
 
33
+ if not run_command(["ffmpeg", "-i", Video, audio_file]):
34
+ print("FFmpeg command failed. Exiting.")
35
+ return
36
+
37
+ segments, _ = whisper_model.transcribe(audio_file, beam_size=5)
38
  segments = list(segments)
39
 
 
40
  temp_transcript_file = tempfile.NamedTemporaryFile(delete=False, suffix=".srt")
41
  with open(temp_transcript_file.name, "w", encoding="utf-8") as f:
42
  counter = 1
 
55
  f.write(f"{segment.text}\n\n")
56
  counter += 1
57
 
58
+ flores_code = lang_codes.get(target_language, "eng_Latn")
 
59
  temp_translated_file = tempfile.NamedTemporaryFile(delete=False, suffix=".srt")
60
  with open(temp_transcript_file.name, "r", encoding="utf-8") as infile, open(temp_translated_file.name, "w", encoding="utf-8") as outfile:
61
  for line in infile:
 
69
  else:
70
  outfile.write("\n")
71
 
72
+ if not os.path.exists(temp_translated_file.name):
73
+ print("Subtitle file does not exist. Exiting.")
74
+ return
75
+
76
+ output_video = "output_video.mp4"
77
+ if not run_command(["ffmpeg", "-i", Video, "-vf", f"subtitles={temp_translated_file.name}", output_video]):
78
+ print("FFmpeg command for embedding subtitles failed. Exiting.")
79
+ return
80
+
81
  os.unlink(temp_transcript_file.name)
82
  os.unlink(temp_translated_file.name)
83
 
84
+ return output_video
85
 
 
86
  iface = gr.Interface(
87
  fn=process_video,
88
  inputs=[
 
94
  title="VIDEO TRANSCRIPTION AND TRANSLATION"
95
  )
96
 
97
+ iface.launch()