debaghtk commited on
Commit
e583e7e
·
1 Parent(s): 28d8414

do not download transcripts for already processed videos

Browse files
Files changed (1) hide show
  1. youtubeTranscription.py +8 -1
youtubeTranscription.py CHANGED
@@ -64,6 +64,13 @@ def fetch_video_data(video_ids):
64
  pbar = tqdm(total=len(video_ids), desc="Processing videos", unit="video")
65
 
66
  for video_id in video_ids:
 
 
 
 
 
 
 
67
  try:
68
  # Fetch video details
69
  video_response = youtube.videos().list(
@@ -185,4 +192,4 @@ if __name__ == "__main__":
185
  video_ids = get_all_video_ids(CHANNEL_ID)
186
 
187
  # Step 2: Fetch transcripts for the videos
188
- video_data = fetch_video_data(video_ids)
 
64
  pbar = tqdm(total=len(video_ids), desc="Processing videos", unit="video")
65
 
66
  for video_id in video_ids:
67
+ # Check if transcription file already exists
68
+ transcription_file = os.path.join('transcriptions', f"{video_id}_transcription.txt")
69
+ if os.path.exists(transcription_file):
70
+ pbar.write(f"Skipping Video ID: {video_id} (transcription already exists)")
71
+ pbar.update(1)
72
+ continue # Skip to the next video
73
+
74
  try:
75
  # Fetch video details
76
  video_response = youtube.videos().list(
 
192
  video_ids = get_all_video_ids(CHANNEL_ID)
193
 
194
  # Step 2: Fetch transcripts for the videos
195
+ video_data = fetch_video_data(video_ids)