Spaces:

Rivalcoder
/

Youtube_Dockor

Runtime error

Youtube_Dockor / app.py

Rivalcoder

Add

39ddda1 8 months ago

2.07 kB

	import os
	import time
	import gradio as gr
	from selenium.webdriver.common.by import By
	import undetected_chromedriver as uc

	# Function to extract YouTube captions using a headless browser
	def get_captions_selenium(video_url):
	try:
	print("🚀 Launching Chromium via undetected-chromedriver...")
	options = uc.ChromeOptions()
	# Point to the system-installed Chromium binary
	options.binary_location = os.environ.get("CHROME_BINARY", "/usr/bin/chromium")
	options.add_argument("--headless=new")
	options.add_argument("--no-sandbox")
	options.add_argument("--disable-dev-shm-usage")

	driver = uc.Chrome(options=options)
	print("🌍 Navigating to video URL...")
	driver.get(video_url)

	print("⌛ Waiting for page to load...")
	time.sleep(5)

	print("📄 Scraping page source...")
	page_source = driver.page_source

	if "captionTracks" in page_source:
	start = page_source.find("captionTracks")
	end = page_source.find("]", start) + 1
	caption_json = page_source[start:end]
	driver.quit()
	return (
	"✅ Found potential captions info.\n"
	"(You can parse this JSON string to extract subtitles.)\n\n"
	+ caption_json
	)
	else:
	driver.quit()
	return "⚠️ Captions info not found in source. May not be available or blocked."

	except Exception as e:
	print(f"❌ Exception occurred: {e}")
	return f"❌ Error: {str(e)}"

	# Gradio interface definition
	default_url = "https://www.youtube.com/watch?v=dQw4w9WgXcQ"
	gr.Interface(
	fn=get_captions_selenium,
	inputs=[
	gr.Textbox(value=default_url, label="YouTube Video URL")

	],
	outputs="text",
	title="YouTube Captions Scraper (Selenium)",
	description=(
	"Extract captions from a YouTube video using a headless browser with "
	"undetected-chromedriver. Logs will appear in the Space's console."
	)
	).launch()