Spaces:
Runtime error
Runtime error
| import time | |
| import gradio as gr | |
| from selenium.webdriver.common.by import By | |
| import undetected_chromedriver as uc | |
| def get_captions_selenium(video_url): | |
| try: | |
| print("π Launching Chrome...") | |
| options = uc.ChromeOptions() | |
| options.add_argument("--headless=new") # Use 'new' headless mode for Chrome 109+ | |
| options.add_argument("--no-sandbox") | |
| options.add_argument("--disable-dev-shm-usage") | |
| driver = uc.Chrome(options=options) | |
| print("π Navigating to video...") | |
| driver.get(video_url) | |
| print("β Waiting for page to load...") | |
| time.sleep(5) | |
| print("π Scraping page source...") | |
| page_source = driver.page_source | |
| if "captionTracks" in page_source: | |
| start = page_source.find("captionTracks") | |
| end = page_source.find("]", start) + 1 | |
| caption_json = page_source[start:end] | |
| driver.quit() | |
| return "β Found potential captions info in page source (you may need to parse this JSON).\n\n" + caption_json | |
| else: | |
| driver.quit() | |
| return "β οΈ Captions info not found in source. May not be available or blocked." | |
| except Exception as e: | |
| print(f"β Exception occurred: {e}") | |
| return f"β Error: {str(e)}" | |
| # Gradio interface | |
| gr.Interface( | |
| fn=get_captions_selenium, | |
| inputs=[gr.Textbox(label="YouTube Video URL")], | |
| outputs="text", | |
| title="YouTube Captions Scraper (Selenium)", | |
| description="Uses Selenium with undetected-chromedriver to extract captions from a YouTube video." | |
| ).launch() | |