Spaces:

debaghtk
/

ryangpt

Runtime error

debaghtk commited on Oct 15, 2024

Commit

e583e7e

1 Parent(s): 28d8414

do not download transcripts for already processed videos

Files changed (1) hide show

youtubeTranscription.py CHANGED Viewed

@@ -64,6 +64,13 @@ def fetch_video_data(video_ids):
     pbar = tqdm(total=len(video_ids), desc="Processing videos", unit="video")
     for video_id in video_ids:
         try:
             # Fetch video details
             video_response = youtube.videos().list(
@@ -185,4 +192,4 @@ if __name__ == "__main__":
     video_ids = get_all_video_ids(CHANNEL_ID)
     # Step 2: Fetch transcripts for the videos
-    video_data = fetch_video_data(video_ids)

     pbar = tqdm(total=len(video_ids), desc="Processing videos", unit="video")
     for video_id in video_ids:
+        # Check if transcription file already exists
+        transcription_file = os.path.join('transcriptions', f"{video_id}_transcription.txt")
+        if os.path.exists(transcription_file):
+            pbar.write(f"Skipping Video ID: {video_id} (transcription already exists)")
+            pbar.update(1)
+            continue  # Skip to the next video
         try:
             # Fetch video details
             video_response = youtube.videos().list(
     video_ids = get_all_video_ids(CHANNEL_ID)
     # Step 2: Fetch transcripts for the videos
+    video_data = fetch_video_data(video_ids)