do not download transcripts for already processed videos
Browse files- youtubeTranscription.py +8 -1
youtubeTranscription.py
CHANGED
|
@@ -64,6 +64,13 @@ def fetch_video_data(video_ids):
|
|
| 64 |
pbar = tqdm(total=len(video_ids), desc="Processing videos", unit="video")
|
| 65 |
|
| 66 |
for video_id in video_ids:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 67 |
try:
|
| 68 |
# Fetch video details
|
| 69 |
video_response = youtube.videos().list(
|
|
@@ -185,4 +192,4 @@ if __name__ == "__main__":
|
|
| 185 |
video_ids = get_all_video_ids(CHANNEL_ID)
|
| 186 |
|
| 187 |
# Step 2: Fetch transcripts for the videos
|
| 188 |
-
video_data = fetch_video_data(video_ids)
|
|
|
|
| 64 |
pbar = tqdm(total=len(video_ids), desc="Processing videos", unit="video")
|
| 65 |
|
| 66 |
for video_id in video_ids:
|
| 67 |
+
# Check if transcription file already exists
|
| 68 |
+
transcription_file = os.path.join('transcriptions', f"{video_id}_transcription.txt")
|
| 69 |
+
if os.path.exists(transcription_file):
|
| 70 |
+
pbar.write(f"Skipping Video ID: {video_id} (transcription already exists)")
|
| 71 |
+
pbar.update(1)
|
| 72 |
+
continue # Skip to the next video
|
| 73 |
+
|
| 74 |
try:
|
| 75 |
# Fetch video details
|
| 76 |
video_response = youtube.videos().list(
|
|
|
|
| 192 |
video_ids = get_all_video_ids(CHANNEL_ID)
|
| 193 |
|
| 194 |
# Step 2: Fetch transcripts for the videos
|
| 195 |
+
video_data = fetch_video_data(video_ids)
|