Spaces:

Darsh1234Tayal
/

Youtube_summarizer_-_basic

Sleeping

App Files Files Community

Youtube_summarizer_-_basic / app.py

Darsh1234Tayal

Update app.py

37d4541 verified 4 months ago

raw

history blame contribute delete

15 kB

	from langchain_text_splitters import RecursiveCharacterTextSplitter
	from langchain_chroma import Chroma
	from langchain_huggingface import HuggingFaceEmbeddings
	from bytez import Bytez
	from youtube_transcript_api import YouTubeTranscriptApi
	import gradio as gr
	from dotenv import load_dotenv
	import os
	from urllib.parse import urlparse, parse_qs
	import time

	api_key = os.environ.get("BYTEZ_API_KEY")
	sdk = Bytez(api_key)

	#toy function
	def video_id_extractor(link):
	if "watch?v=" in link:
	return link[32:43]
	else:
	return link[17:28]

	#production ready function
	def video_id_extractor(link):
	parsed_url = urlparse(link)

	if "youtube.com" in parsed_url.netloc:
	return parse_qs(parsed_url.query).get("v", [None])[0]

	elif "youtu.be" in parsed_url.netloc:
	return parsed_url.path.lstrip("/")

	return None

	def generate_transcript(video_id):
	from youtube_transcript_api import YouTubeTranscriptApi, _errors
	import traceback

	print(f"[INFO] Fetching transcript for video ID: {video_id}")
	try:
	trans = YouTubeTranscriptApi()
	transcript_raw = trans.fetch(video_id=video_id)
	transcript = " ".join([i.text for i in transcript_raw.snippets])
	print(f"[INFO] Transcript fetched. Length: {len(transcript)} chars")
	return transcript
	except _errors.TranscriptsDisabled:
	print(f"[ERROR] Transcripts are disabled for video {video_id}")
	except _errors.VideoUnavailable:
	print(f"[ERROR] Video unavailable or restricted: {video_id}")
	except _errors.NoTranscriptFound:
	print(f"[ERROR] No transcript found (no captions in English) for {video_id}")
	except Exception as e:
	print(f"[ERROR] Unexpected exception fetching transcript: {e}")
	traceback.print_exc()
	return None

	def create_and_save_vs(trans):
	try:
	splitter = RecursiveCharacterTextSplitter(chunk_size = 100, chunk_overlap = 50)
	docs = splitter.split_text(trans)
	embeddings = HuggingFaceEmbeddings(model_name='sentence-transformers/all-mpnet-base-v2')
	vector_store_db = Chroma.from_texts(docs, embeddings)
	except Exception:
	return None
	return vector_store_db

	def generate_summary(trans):
	try:
	model = sdk.model("openai/gpt-4o")
	if len(trans.split(" ")) > 90000:
	trans = trans.split(" ")[0:85000]
	trans = " ".join(trans)
	except Exception:
	return None
	Inp = [{"role": "system", "content": "You are a youtube transcipt sammurizer. Sammurize the transcript under 100 words"}, {"role":"user", "content":trans}]
	trails = 4
	failed = True
	time_to_sleep = 3
	while failed and trails > 0:
	res = model.run(Inp)
	if type(res) == list and len(res) == 3:
	failed = False
	trails -= 1
	return res[0]["content"]
	else:
	time.sleep(time_to_sleep)
	time_to_sleep = time_to_sleep **2
	trails -= 1
	return None

	import traceback

	def setter(link):
	print(f"[INFO] Received link: {link}")
	yield gr.update(visible=False), gr.update(visible=True), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), "", ""

	try:
	video_id = video_id_extractor(link)
	print(f"[INFO] Extracted video ID: {video_id}")
	if not video_id:
	print("[ERROR] Invalid video link")
	yield gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=True), gr.update(visible=False), gr.update(visible=False), "", ""
	return

	transcript = generate_transcript(video_id)
	print(f"[INFO] Transcript length: {len(transcript) if transcript else 0}")
	if not transcript:
	print("[ERROR] Transcript generation failed")
	yield gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=True), gr.update(visible=False), "", ""
	return

	vectorstore = create_and_save_vs(transcript)
	print("[INFO] Vectorstore created")
	if not vectorstore:
	print("[ERROR] Vectorstore creation failed")
	yield gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=True), "", ""
	return

	summary = generate_summary(transcript)
	print(f"[INFO] Summary generated: {summary[:80] if summary else None}")
	if not summary:
	print("[ERROR] Summary generation failed")
	yield gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=True), "", ""
	return

	yield gr.update(visible=False), gr.update(visible=False), gr.update(visible=True), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), summary, vectorstore

	except Exception as e:
	print("[EXCEPTION in setter]:", e)
	traceback.print_exc()
	yield gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=True), "", ""

	def execute(vec, query):
	try:
	res = vec.similarity_search(query, k=3)
	result = ""
	for i in res:
	result += f"\n{i.page_content}"
	model = sdk.model("openai/gpt-4o")
	inp = [{"role": "system", "content": "You are a helpful assistant - you will be asked a query and provided with a context. You have to answer that query based on the provided context - do not make things up. Do not reveal the whole context, answer as like you already knew the context"}, {"role":"user", "content":f"query: {query} \| context: {result}"}]
	res = model.run(inp)
	return res[0]['content'], gr.update(visible=True), gr.update(visible=False)
	except Exception:
	return "", gr.update(visible=False), gr.update(visible=True)

	with gr.Blocks(
	theme=gr.themes.Soft(
	primary_hue="blue",
	secondary_hue="indigo",
	),
	css="""
	/* Global Styles */
	.gradio-container {
	font-family: 'Inter', 'Segoe UI', sans-serif !important;
	max-width: 1200px !important;
	margin: 0 auto !important;
	}

	/* Header Branding */
	.header-brand {
	background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
	padding: 2rem;
	border-radius: 16px;
	margin-bottom: 2rem;
	box-shadow: 0 10px 40px rgba(102, 126, 234, 0.3);
	animation: fadeInDown 0.8s ease-out;
	}

	.header-brand h1 {
	color: white;
	font-size: 2.5rem;
	font-weight: 700;
	margin: 0;
	text-shadow: 2px 2px 4px rgba(0,0,0,0.2);
	}

	.header-brand p {
	color: rgba(255,255,255,0.95);
	font-size: 1.1rem;
	margin: 0.5rem 0 0 0;
	}

	/* Footer Branding */
	.footer-brand {
	background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
	padding: 1.5rem;
	border-radius: 12px;
	margin-top: 2rem;
	text-align: center;
	box-shadow: 0 -5px 20px rgba(102, 126, 234, 0.2);
	}

	.footer-brand p {
	color: white;
	margin: 0.3rem 0;
	font-size: 0.95rem;
	}

	.footer-brand a {
	color: #ffd700;
	text-decoration: none;
	font-weight: 600;
	transition: all 0.3s ease;
	}

	.footer-brand a:hover {
	color: #fff;
	text-shadow: 0 0 10px rgba(255,255,255,0.5);
	}

	/* Main Title Animation */
	.main-title {
	background: linear-gradient(90deg, #667eea, #764ba2, #667eea);
	background-size: 200% auto;
	color: white;
	padding: 1.5rem;
	border-radius: 12px;
	text-align: center;
	font-size: 1.8rem;
	font-weight: 600;
	margin-bottom: 2rem;
	box-shadow: 0 8px 32px rgba(102, 126, 234, 0.4);
	animation: gradientShift 3s ease infinite, fadeIn 1s ease-out;
	}

	/* Button Styles */
	.gr-button {
	background: linear-gradient(135deg, #667eea 0%, #764ba2 100%) !important;
	border: none !important;
	color: white !important;
	font-weight: 600 !important;
	padding: 12px 32px !important;
	border-radius: 8px !important;
	transition: all 0.3s ease !important;
	box-shadow: 0 4px 15px rgba(102, 126, 234, 0.4) !important;
	text-transform: uppercase;
	letter-spacing: 0.5px;
	}

	.gr-button:hover {
	transform: translateY(-2px) !important;
	box-shadow: 0 6px 25px rgba(102, 126, 234, 0.6) !important;
	}

	.gr-button:active {
	transform: translateY(0px) !important;
	}

	/* Input Fields */
	.gr-textbox, .gr-text-input {
	border-radius: 8px !important;
	border: 2px solid #e0e7ff !important;
	transition: all 0.3s ease !important;
	}

	.gr-textbox:focus, .gr-text-input:focus {
	border-color: #667eea !important;
	box-shadow: 0 0 0 3px rgba(102, 126, 234, 0.1) !important;
	}

	/* Loading Animation */
	.loading-container {
	text-align: center;
	padding: 3rem;
	}

	.loading-text {
	font-size: 1.5rem;
	color: #667eea;
	animation: pulse 1.5s ease-in-out infinite;
	}

	/* Error Messages */
	.error-message {
	background: linear-gradient(135deg, #f093fb 0%, #f5576c 100%);
	color: white;
	padding: 1.5rem;
	border-radius: 12px;
	text-align: center;
	font-size: 1.3rem;
	font-weight: 600;
	box-shadow: 0 8px 32px rgba(245, 87, 108, 0.3);
	animation: shake 0.5s ease-in-out;
	}

	/* Success/Summary Box */
	.summary-box {
	background: linear-gradient(135deg, #a8edea 0%, #fed6e3 100%);
	padding: 1.5rem;
	border-radius: 12px;
	margin-bottom: 1.5rem;
	box-shadow: 0 8px 24px rgba(168, 237, 234, 0.3);
	animation: fadeInUp 0.6s ease-out;
	}

	/* Chat Section */
	.chat-section {
	animation: fadeInUp 0.8s ease-out;
	}

	/* Animations */
	@keyframes fadeIn {
	from {
	opacity: 0;
	}
	to {
	opacity: 1;
	}
	}

	@keyframes fadeInDown {
	from {
	opacity: 0;
	transform: translateY(-30px);
	}
	to {
	opacity: 1;
	transform: translateY(0);
	}
	}

	@keyframes fadeInUp {
	from {
	opacity: 0;
	transform: translateY(30px);
	}
	to {
	opacity: 1;
	transform: translateY(0);
	}
	}

	@keyframes pulse {
	0%, 100% {
	opacity: 1;
	}
	50% {
	opacity: 0.5;
	}
	}

	@keyframes shake {
	0%, 100% { transform: translateX(0); }
	25% { transform: translateX(-10px); }
	75% { transform: translateX(10px); }
	}

	@keyframes gradientShift {
	0% {
	background-position: 0% 50%;
	}
	50% {
	background-position: 100% 50%;
	}
	100% {
	background-position: 0% 50%;
	}
	}

	/* Responsive Design */
	@media (max-width: 768px) {
	.header-brand h1 {
	font-size: 1.8rem;
	}
	.main-title {
	font-size: 1.3rem;
	}
	}
	"""
	) as ui:
	# Header Branding
	gr.HTML("""
	<div class="header-brand">
	<h1>🎓 AI YouTube Study Assistant</h1>
	<p>Transform lengthy videos into concise knowledge</p>
	</div>
	""")

	vs = gr.State()
	gr.HTML('<div class="main-title">📹 Why watch long YouTube videos when you could study from AI?</div>')

	with gr.Row(visible=True) as first_page:
	youtube_link = gr.Textbox(
	label="Enter the youtube link here: ",
	lines=2,
	placeholder="https://www.youtube.com/watch?v=..."
	)
	submit_button = gr.Button("SUBMIT!")

	with gr.Row(visible=False) as chat_page:
	with gr.Column():
	summary = gr.Markdown(elem_classes="summary-box")
	gr.Markdown("### 💬 Now ask any question about the video:")
	ques = gr.Textbox(
	label="Enter the question here: ",
	lines=2,
	placeholder="What is the main topic of this video?"
	)
	submit_answer = gr.Button("SUBMIT!")
	answer = gr.TextArea(label="ANSWER")

	with gr.Row(visible=False) as wrong_link_page:
	gr.HTML('<div class="error-message">❌ Sorry, your link wasn\'t correct. Please try again!</div>')

	with gr.Row(visible=False) as cc_not_enabled:
	gr.HTML('<div class="error-message">⚠️ The link you provided was either not valid or subtitles weren\'t enabled in that video</div>')

	with gr.Row(visible=False) as loading_page:
	gr.HTML('<div class="loading-container"><div class="loading-text">⏳ Loading... Please Wait</div></div>')

	with gr.Row(visible=False) as normal_error:
	gr.HTML('<div class="error-message">😔 SORRY, SOME ERROR OCCURRED. PLEASE TRY AGAIN LATER</div>')

	# Footer Branding
	gr.HTML("""
	<div class="footer-brand">
	<p><strong>Developed by Darsh Tayal</strong></p>
	<p>📧 <a href="mailto:darshtayal8@gmail.com">darshtayal8@gmail.com</a></p>
	<p style="margin-top: 1rem; font-size: 0.85rem; opacity: 0.9;">© 2024 All Rights Reserved</p>
	</div>
	""")

	submit_button.click(setter, inputs=[youtube_link], outputs=[first_page, loading_page, chat_page, wrong_link_page, cc_not_enabled, normal_error, summary, vs])
	submit_answer.click(execute, inputs=[vs, ques], outputs=[answer, chat_page, normal_error])

	ui.launch()