artificialguybr commited on
Commit
0bc447a
·
1 Parent(s): f4c59d4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -13
app.py CHANGED
@@ -13,17 +13,19 @@ import subprocess
13
  import torch
14
  import bitsandbytes
15
  import scipy
 
16
 
17
  ZipFile("ffmpeg.zip").extractall()
18
  st = os.stat('ffmpeg')
19
  os.chmod('ffmpeg', st.st_mode | stat.S_IEXEC)
20
 
21
- with open('language_codes.json', 'r') as f:
22
- lang_codes = json.load(f)
23
 
 
24
 
25
- tokenizer = AutoTokenizer.from_pretrained("facebook/nllb-200-3.3B")
26
- model = AutoModelForSeq2SeqLM.from_pretrained("facebook/nllb-200-3.3B")
27
  whisper_model = WhisperModel("large-v2", device="cuda", compute_type="float16")
28
 
29
  print("cwd", os.getcwd())
@@ -75,21 +77,16 @@ def process_video(Video, target_language):
75
  f.seek(0)
76
 
77
  # Translating the SRT from Whisper with NLLB.
78
- flores_code = lang_codes.get(target_language, "eng_Latn")
79
  paragraph = ""
80
  for line in f:
81
  if line.strip().isnumeric() or "-->" in line:
82
- if paragraph:
83
- inputs = tokenizer(paragraph, return_tensors="pt")
84
- translated_tokens = model.generate(**inputs, forced_bos_token_id=tokenizer.lang_code_to_id[flores_code], max_length=100)
85
- translated_text = tokenizer.batch_decode(translated_tokens, skip_special_tokens=True)[0]
86
- translated_lines.append(translated_text + "\n")
87
- paragraph = ""
88
  translated_lines.append(line)
89
  elif line.strip() != "":
90
- paragraph += " " + line.strip()
 
91
  else:
92
- translated_lines.append("\n")
93
 
94
  # Move the file pointer to the beginning of the file and truncate it.
95
  f.seek(0)
 
13
  import torch
14
  import bitsandbytes
15
  import scipy
16
+ from googletrans import Translator
17
 
18
  ZipFile("ffmpeg.zip").extractall()
19
  st = os.stat('ffmpeg')
20
  os.chmod('ffmpeg', st.st_mode | stat.S_IEXEC)
21
 
22
+ with open('google_lang_codes.json', 'r') as f:
23
+ google_lang_codes = json.load(f)
24
 
25
+ translator = Translator()
26
 
27
+ #tokenizer = AutoTokenizer.from_pretrained("facebook/nllb-200-3.3B")
28
+ #model = AutoModelForSeq2SeqLM.from_pretrained("facebook/nllb-200-3.3B")
29
  whisper_model = WhisperModel("large-v2", device="cuda", compute_type="float16")
30
 
31
  print("cwd", os.getcwd())
 
77
  f.seek(0)
78
 
79
  # Translating the SRT from Whisper with NLLB.
80
+ target_language_code = google_lang_codes.get(target_language, "en")
81
  paragraph = ""
82
  for line in f:
83
  if line.strip().isnumeric() or "-->" in line:
 
 
 
 
 
 
84
  translated_lines.append(line)
85
  elif line.strip() != "":
86
+ translated_text = translator.translate(line.strip(), dest=target_language_code).text
87
+ translated_lines.append(translated_text + "\n")
88
  else:
89
+ translated_lines.append("\n"))
90
 
91
  # Move the file pointer to the beginning of the file and truncate it.
92
  f.seek(0)