Spaces:
Runtime error
Runtime error
| #importing the necessary modules | |
| import os | |
| import urllib.request | |
| import re | |
| import time | |
| import gradio as gr | |
| #Creating a Gradio App Menu | |
| #def transcript_extract(): | |
| #specifying the YouTube channel URL | |
| channel_url = gr.inputs.Textbox(label="Channel URL") | |
| #accessing the webpage | |
| page = urllib.request.urlopen(channel_url) | |
| #reading the source code | |
| data = page.read().decode("utf-8") | |
| #creating a directory to save the transcripts | |
| # os.makedirs('Transcripts',exist_ok=True) | |
| #finding the transcripts | |
| transcript_links = re.findall(r'(\/watch\?v=[A-Za-z0-9_.-]*)', str(data)) | |
| #looping through each transcript to download | |
| for link in transcript_links: | |
| video_url = 'http://www.youtube.com'+link | |
| #access the video page | |
| video_page = urllib.request.urlopen(video_url) | |
| #read the source code | |
| video_data = video_page.read().decode("utf-8") | |
| #find the transcript | |
| transcript_link = re.findall(r'(\/timedtext_editor\?[A-Za-z0-9_.-]*)', str(video_data)) | |
| #check if there is a transcript available | |
| if(len(transcript_link) > 0): | |
| #access the transcript page | |
| transcript_url ='http://www.youtube.com'+ transcript_link[0] | |
| transcript_page = urllib.request.urlopen(transcript_url) | |
| transcript_data = transcript_page.read().decode("utf-8") | |
| #find the link to the transcript | |
| transcript_download_link = re.findall(r'(\/api\/timedtext\?[A-Za-z0-9_.-]*)', str(transcript_data)) | |
| #check if the transcript is available for download | |
| if(len(transcript_download_link) > 0): | |
| #download the transcript | |
| # file_name = "Transcripts/" + link[9:] + ".xml" | |
| file_name = link[9:] + ".xml" | |
| download_url = 'http://www.youtube.com'+transcript_download_link[0] | |
| urllib.request.urlretrieve(download_url, file_name) | |
| print("Downloading transcript for video " + link[9:] + "...") | |
| time.sleep(3) | |
| else: | |
| print("Transcript not available for video " + link[9:]) | |
| else: | |
| print("Transcript not available for video " + link[9:]) | |
| #launch the gradio | |
| gr.Interface(fn=transcript_extract, inputs="textbox", outputs="textbox", share=True).launch() |