Spaces:

StarPigeon
/

ViDove

Sleeping

App Files Files Community

JiaenLiu commited on Nov 12, 2023

Commit

04ef04e

1 Parent(s): 00ca03a

module 1 and module 4

Browse files

Former-commit-id: 5053510b5fc3e67be394af64af26b347de3739a1

Files changed (1) hide show

src/task.py +72 -3

src/task.py CHANGED Viewed

@@ -10,8 +10,10 @@ import logging
 import subprocess
 from src.srt_util.srt import SrtScript
 from src.srt_util.srt2ass import srt2ass
 """
 Youtube link
@@ -77,6 +79,8 @@ class Task:
         self.progress = NotImplemented
         self.SRT_Script = None
         self.result = None
     @staticmethod
@@ -100,7 +104,40 @@ class Task:
     def get_srt_class(self, whisper_model='tiny', method="stable"):
         # Instead of using the script_en variable directly, we'll use script_input
         self.status = TaskStatus.INITIALIZING_ASR
-        self.SRT_Script = SrtScript
         time.sleep(5)
         pass
@@ -129,11 +166,43 @@ class Task:
     # Module 3: perform srt translation
     def translation(self):
         time.sleep(5)
         pass
     # Module 4: perform srt post process steps
-    def postprocess(self):
         self.status = TaskStatus.POST_PROCESSING
         time.sleep(5)
         pass

 import subprocess
 from src.srt_util.srt import SrtScript
 from src.srt_util.srt2ass import srt2ass
+from time import time, strftime, gmtime, sleep
+import torch
+import stable_whisper
 """
 Youtube link
         self.progress = NotImplemented
         self.SRT_Script = None
         self.result = None
+        self.s_t = None
+        self.t_e = None
     @staticmethod
     def get_srt_class(self, whisper_model='tiny', method="stable"):
         # Instead of using the script_en variable directly, we'll use script_input
         self.status = TaskStatus.INITIALIZING_ASR
+        self.t_s = time()
+        # self.SRT_Script = SrtScript
+        en_srt_path = self.task_local_dir.joinpath(f"task_{self.task_id})_en.srt")
+        if not Path.exists(en_srt_path):
+            # extract script from audio
+            logging.info("extract script from audio")
+            device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+            logging.info("device: ", device)
+            audio_path = self.task_local_dir.joinpath(f"task_{self.task_id}.mp3")
+            if method == "api":
+                with open(audio_path, 'rb') as audio_file:
+                    transcript = openai.Audio.transcribe(model="whisper-1", file=audio_file, response_format="srt")
+            elif method == "stable":
+                model = stable_whisper.load_model(whisper_model, device)
+                transcript = model.transcribe(str(self.audio_path), regroup=False,
+                                                  initial_prompt="Hello, welcome to my lecture. Are you good my friend?")
+                (
+                    transcript
+                    .split_by_punctuation(['.', '。', '?'])
+                    .merge_by_gap(.15, max_words=3)
+                    .merge_by_punctuation([' '])
+                    .split_by_punctuation(['.', '。', '?'])
+                )
+                transcript = transcript.to_dict()
+            # after get the transcript, release the gpu resource
+            torch.cuda.empty_cache()
+        self.SRT_Script = SrtScript(transcript)
+        # save the srt script to local
+        self.SRT_Script.write_srt_file_src(en_srt_path)
         time.sleep(5)
         pass
     # Module 3: perform srt translation
     def translation(self):
         time.sleep(5)
         pass
     # Module 4: perform srt post process steps
+    def postprocess(self, encode=False, srt_only=False):
         self.status = TaskStatus.POST_PROCESSING
+        logging.info("---------------------Start Post-processing SRT class---------------------")
+        self.SRT_Script.check_len_and_split()
+        self.SRT_Script.remove_trans_punctuation()
+        base_path = Path(self.dir_result).joinpath(self.video_name).joinpath(self.video_name)
+        self.SRT_Script.write_srt_file_translate(f"{base_path}_zh.srt")
+        self.SRT_Script.write_srt_file_bilingual(f"{base_path}_bi.srt")
+        logging.info("write Chinese .srt file to .ass")
+        assSub_zh = srt2ass(f"{base_path}_zh.srt", "default", "No", "Modest")
+        logging.info('ASS subtitle saved as: ' + assSub_zh)
+        # encode to .mp4 video file
+        if encode:
+            logging.info("encoding video file")
+            if srt_only:
+                subprocess.run(
+                    f'ffmpeg -i {self.video_path} -vf "subtitles={base_path}_zh.srt" {base_path}.mp4')
+            else:
+                subprocess.run(
+                    f'ffmpeg -i {self.video_path} -vf "subtitles={base_path}_zh.ass" {base_path}.mp4')
+        self.t_e = time()
+        logging.info(
+            "Pipeline finished, time duration:{}".format(strftime("%H:%M:%S", gmtime(self.t_e - self.t_s))))
         time.sleep(5)
         pass