K00B404's picture
Update app.py
bfd1883 verified
from youtube_transcript_api import YouTubeTranscriptApi
from googleapiclient.discovery import build
from docx import Document
import re
import os
def get_youtube_transcript(video_id):
try:
transcript = YouTubeTranscriptApi.get_transcript(video_id)
return transcript
except Exception as e:
print(f"An error occurred: {e}")
return None
def get_video_title(api_key, video_id):
youtube = build('youtube', 'v3', developerKey=api_key)
request = youtube.videos().list(part='snippet', id=video_id)
response = request.execute()
if 'items' in response and len(response['items']) > 0:
return response['items'][0]['snippet']['title']
return None
def save_transcript_to_doc(transcript, filename):
doc = Document()
doc.add_heading('YouTube Video Transcript', 0)
for entry in transcript:
start_time = entry['start']
duration = entry['duration']
text = entry['text']
doc.add_paragraph(f"{start_time:.2f} - {duration:.2f}: {text}")
doc.save(filename)
def sanitize_filename(filename):
return re.sub(r'[\\/*?:"<>|]', "_", filename)
if __name__ == "__main__":
api_key = os.getenv('YOUR_YOUTUBE_DATA_API_KEY') # Replace this with your API key
video_id = 'nTQIYWgn-lQ' # Replace this with your video ID
transcript = get_youtube_transcript(video_id)
video_title = get_video_title(api_key, video_id)
if transcript and video_title:
sanitized_title = sanitize_filename(video_title)
output_filename = f"./{sanitized_title}.txt"
#save_transcript_to_doc(transcript, output_filename)
print(f"Transcript texty is {transcript}.")
else:
print("Transcript or video title not available.")