Spaces:
Sleeping
Sleeping
video_length = get_video_duration(video_id)
Browse files
app.py
CHANGED
|
@@ -369,6 +369,10 @@ def generate_transcription_by_whisper(video_id):
|
|
| 369 |
|
| 370 |
return transcription
|
| 371 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 372 |
def process_transcript_and_screenshots_on_gcs(video_id):
|
| 373 |
print("====process_transcript_and_screenshots_on_gcs====")
|
| 374 |
# GCS
|
|
@@ -396,6 +400,11 @@ def process_transcript_and_screenshots_on_gcs(video_id):
|
|
| 396 |
print("沒有找到字幕")
|
| 397 |
transcript = generate_transcription_by_whisper(video_id)
|
| 398 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 399 |
transcript_text = json.dumps(transcript, ensure_ascii=False, indent=2)
|
| 400 |
GCS_SERVICE.upload_json_string(bucket_name, transcript_blob_name, transcript_text)
|
| 401 |
|
|
|
|
| 369 |
|
| 370 |
return transcription
|
| 371 |
|
| 372 |
+
def get_video_duration(video_id):
|
| 373 |
+
yt = YouTube(f'https://www.youtube.com/watch?v={video_id}')
|
| 374 |
+
return yt.length
|
| 375 |
+
|
| 376 |
def process_transcript_and_screenshots_on_gcs(video_id):
|
| 377 |
print("====process_transcript_and_screenshots_on_gcs====")
|
| 378 |
# GCS
|
|
|
|
| 400 |
print("沒有找到字幕")
|
| 401 |
transcript = generate_transcription_by_whisper(video_id)
|
| 402 |
|
| 403 |
+
video_length = get_video_duration(video_id)
|
| 404 |
+
for entry in transcript:
|
| 405 |
+
if entry['start'] > video_length:
|
| 406 |
+
transcript.remove(entry)
|
| 407 |
+
|
| 408 |
transcript_text = json.dumps(transcript, ensure_ascii=False, indent=2)
|
| 409 |
GCS_SERVICE.upload_json_string(bucket_name, transcript_blob_name, transcript_text)
|
| 410 |
|