Spaces:

agnixcode
/

youtube-rag-chat

Runtime error

App Files Files Community

youtube-rag-chat / app.py

agnixcode

Update app.py

587e2e0 verified 22 days ago

raw

history blame contribute delete

4.71 kB

	import os
	import re
	import gradio as gr
	import numpy as np
	import faiss
	from youtube_transcript_api import YouTubeTranscriptApi
	from sentence_transformers import Transformer, SentenceTransformer
	from langchain_text_splitters import RecursiveCharacterTextSplitter
	from groq import Groq

	# ===============================
	# CONFIGURATION
	# ===============================

	# Load Groq API Key from Hugging Face Secrets
	GROQ_API_KEY = os.getenv("GROQ_API_KEY")
	groq_client = Groq(api_key=GROQ_API_KEY) if GROQ_API_KEY else None

	# Load embedding model
	embedding_model = SentenceTransformer("all-MiniLM-L6-v2")

	# Global variables to store the "brain" of the current video
	vector_store = None
	chunks_store = []

	# ===============================
	# CORE FUNCTIONS
	# ===============================

	def extract_video_id(url):
	"""Extracts the 11-character YouTube video ID."""
	# Handles standard URLs, shorts, and shared links
	regex = r"(?:v=\|\/\|be\/\|embed\/\|shorts\/)([0-9A-Za-z_-]{11})"
	match = re.search(regex, url)
	return match.group(1) if match else None

	def get_transcript(url):
	"""Fetches transcript from YouTube."""
	video_id = extract_video_id(url)
	if not video_id:
	return "ERROR: Invalid YouTube URL."

	try:
	# Correct Method Call using the imported class
	transcript_list = YouTubeTranscriptApi.get_transcript(video_id)
	text = " ".join([i['text'] for i in transcript_list])
	return text
	except Exception as e:
	return f"ERROR: Could not retrieve transcript. (Details: {str(e)})"

	def build_vector_index(text):
	"""Chunks text and stores it in a FAISS vector database."""
	global vector_store, chunks_store

	# 1. Chunking
	splitter = RecursiveCharacterTextSplitter(chunk_size=600, chunk_overlap=60)
	chunks_store = splitter.split_text(text)

	# 2. Embedding
	embeddings = embedding_model.encode(chunks_store)

	# 3. Indexing with FAISS
	dimension = embeddings.shape[1]
	index = faiss.IndexFlatL2(dimension)
	index.add(np.array(embeddings).astype('float32'))
	vector_store = index

	def get_ai_response(user_query):
	"""Retrieves context and asks Groq Llama 3."""
	if vector_store is None or not chunks_store:
	return "Please load a video first."

	# Search for relevant chunks
	query_embedding = embedding_model.encode([user_query])
	D, I = vector_store.search(np.array(query_embedding).astype('float32'), k=3)
	context = "\n".join([chunks_store[i] for i in I[0] if i != -1])

	prompt = f"""Use the following video transcript context to answer the question.
	If the answer isn't in the context, say you don't know based on the video.

	Context: {context}
	Question: {user_query}
	Answer:"""

	try:
	completion = groq_client.chat.completions.create(
	model="llama-3.3-70b-versatile",
	messages=[{"role": "user", "content": prompt}]
	)
	return completion.choices[0].message.content
	except Exception as e:
	return f"AI Error: {str(e)}"

	# ===============================
	# UI LOGIC
	# ===============================

	def process_video_step(url):
	transcript = get_transcript(url)
	if transcript.startswith("ERROR"):
	return transcript, "❌ Failed"

	build_vector_index(transcript)
	return transcript[:1000] + "...", "✅ Video Indexed! Go to Chat tab."

	def chat_step(message, history):
	if not GROQ_API_KEY:
	history.append((message, "Error: Groq API Key missing in Secrets."))
	return history, ""

	answer = get_ai_response(message)
	history.append((message, answer))
	return history, ""

	# ===============================
	# GRADIO INTERFACE
	# ===============================

	with gr.Blocks(theme=gr.themes.Soft()) as demo:
	gr.Markdown("# 📺 YouTube AI Expert (RAG)")

	with gr.Tabs():
	with gr.Tab("1. Setup Video"):
	url_input = gr.Textbox(label="YouTube URL", placeholder="https://www.youtube.com/watch?v=...")
	process_btn = gr.Button("Process Video", variant="primary")
	status = gr.Textbox(label="Status")
	preview = gr.Textbox(label="Transcript Preview (First 1000 chars)", lines=5)

	process_btn.click(process_video_step, inputs=url_input, outputs=[preview, status])

	with gr.Tab("2. Chat with Video"):
	chatbot = gr.Chatbot(height=400)
	msg = gr.Textbox(label="Ask anything about the video...")
	clear = gr.ClearButton([msg, chatbot])

	msg.submit(chat_step, [msg, chatbot], [chatbot, msg])

	if __name__ == "__main__":
	demo.launch()