Spaces:

JunyiAcademy
/

vaitor2

Sleeping

App Files Files Community

youngtsai commited on Apr 26, 2024

Commit

2c1b15d

1 Parent(s): 6b24245

def get_video_duration(video_id):

Browse files

Files changed (1) hide show

app.py +14 -5

app.py CHANGED Viewed

@@ -415,7 +415,14 @@ def generate_transcription_by_whisper(video_id):
 def get_video_duration(video_id):
     yt = YouTube(f'https://www.youtube.com/watch?v={video_id}')
-    return yt.length
 def process_transcript_and_screenshots_on_gcs(video_id):
     print("====process_transcript_and_screenshots_on_gcs====")
@@ -428,7 +435,7 @@ def process_transcript_and_screenshots_on_gcs(video_id):
     # 检查逐字稿是否存在
     is_new_transcript = False
     is_transcript_exists = GCS_SERVICE.check_file_exists(bucket_name, transcript_blob_name)
-    video_length = get_video_duration(video_id)
     if not is_transcript_exists:
         print("逐字稿文件不存在于GCS中，重新建立")
         # 从YouTube获取逐字稿并上传
@@ -444,8 +451,9 @@ def process_transcript_and_screenshots_on_gcs(video_id):
         else:
             print("沒有找到字幕")
             transcript = generate_transcription_by_whisper(video_id)
-        transcript = [entry for entry in transcript if entry['start'] <= video_length]
         transcript_text = json.dumps(transcript, ensure_ascii=False, indent=2)
         GCS_SERVICE.upload_json_string(bucket_name, transcript_blob_name, transcript_text)
@@ -455,7 +463,8 @@ def process_transcript_and_screenshots_on_gcs(video_id):
         print("逐字稿已存在于GCS中")
         transcript_text = GCS_SERVICE.download_as_string(bucket_name, transcript_blob_name)
         transcript = json.loads(transcript_text)
-        transcript = [entry for entry in transcript if entry['start'] <= video_length]
     # print("===確認其他衍生文件===")
     # source = "gcs"

 def get_video_duration(video_id):
     yt = YouTube(f'https://www.youtube.com/watch?v={video_id}')
+    try:
+        video_duration = yt.length
+    except:
+        video_duration = None
+    print(f"video_duration: {video_duration}")
+    return video_duration
 def process_transcript_and_screenshots_on_gcs(video_id):
     print("====process_transcript_and_screenshots_on_gcs====")
     # 检查逐字稿是否存在
     is_new_transcript = False
     is_transcript_exists = GCS_SERVICE.check_file_exists(bucket_name, transcript_blob_name)
+    video_duration = get_video_duration(video_id)
     if not is_transcript_exists:
         print("逐字稿文件不存在于GCS中，重新建立")
         # 从YouTube获取逐字稿并上传
         else:
             print("沒有找到字幕")
             transcript = generate_transcription_by_whisper(video_id)
+        if video_duration:
+            transcript = [entry for entry in transcript if entry['start'] <= video_duration]
         transcript_text = json.dumps(transcript, ensure_ascii=False, indent=2)
         GCS_SERVICE.upload_json_string(bucket_name, transcript_blob_name, transcript_text)
         print("逐字稿已存在于GCS中")
         transcript_text = GCS_SERVICE.download_as_string(bucket_name, transcript_blob_name)
         transcript = json.loads(transcript_text)
+        if video_duration:
+            transcript = [entry for entry in transcript if entry['start'] <= video_duration]
     # print("===確認其他衍生文件===")
     # source = "gcs"