# ============================================================ # YouTube RAG Q&A System — Production-Quality Colab Notebook # Author : Your Name # Model : Groq LLaMA-3.3-70B-Versatile (128K context) # Embedder: all-MiniLM-L6-v2 (Sentence-Transformers, free) # Vector DB: FAISS (Facebook AI, free, CPU) # UI : Gradio 4.x # ============================================================ # ───────────────────────────────────────────────────────────── # MODULE 0 ❯ INSTALLATION # Run this cell once. Restart runtime after it finishes. # ───────────────────────────────────────────────────────────── # !pip install -q \ # gradio \ # youtube-transcript-api \ # sentence-transformers \ # faiss-cpu \ # groq \ # langchain-text-splitters \ # python-dotenv # ───────────────────────────────────────────────────────────── # MODULE 1 ❯ IMPORTS & CONFIGURATION # All third-party imports live here. # API key is read from Colab Secrets (preferred) or env var. # ───────────────────────────────────────────────────────────── import os import re import logging from typing import Optional # ── UI framework ───────────────────────────────────────────── import gradio as gr # ── YouTube transcript (free, no API key required) ─────────── from youtube_transcript_api import YouTubeTranscriptApi from youtube_transcript_api._errors import ( TranscriptsDisabled, NoTranscriptFound, VideoUnavailable, ) # ── Embedding model (local, runs on CPU) ───────────────────── from sentence_transformers import SentenceTransformer # ── Text splitting ──────────────────────────────────────────── from langchain_text_splitters import RecursiveCharacterTextSplitter # ── Numerical / vector DB ───────────────────────────────────── import numpy as np import faiss # ── Groq LLM client ─────────────────────────────────────────── from groq import Groq # ── Logging — shows clean status in Colab output ────────────── logging.basicConfig( level=logging.INFO, format="%(asctime)s | %(levelname)s | %(message)s", datefmt="%H:%M:%S", ) log = logging.getLogger("rag") # ── API key ──────────────────────────────────────────────────── # Option A (recommended in Colab): use Secrets panel (🔑 left sidebar) # key name → GROQ_API_KEY try: from google.colab import userdata # type: ignore GROQ_API_KEY = userdata.get("GROQ_API_KEY") except Exception: GROQ_API_KEY = os.getenv("GROQ_API_KEY", "") if not GROQ_API_KEY: raise EnvironmentError( "⚠️ GROQ_API_KEY not found. " "Add it via Colab Secrets (🔑) or set os.environ['GROQ_API_KEY']." ) # ── Model identifiers ────────────────────────────────────────── GROQ_MODEL = "llama-3.3-70b-versatile" # 128K context, best OSS on Groq 2025 EMBED_MODEL = "all-MiniLM-L6-v2" # 384-dim, fast, free, CPU-friendly CHUNK_SIZE = 500 # tokens per chunk CHUNK_OVERLAP = 50 # overlap to preserve context across chunks TOP_K = 4 # how many chunks to retrieve per query MAX_NEW_TOKENS = 1024 # LLM answer budget # ───────────────────────────────────────────────────────────── # MODULE 2 ❯ MODEL INITIALISATION # Load embedding model once at startup so every call is fast. # Groq client is stateless — one instance is enough. # ───────────────────────────────────────────────────────────── log.info("Loading embedding model …") embedding_model = SentenceTransformer(EMBED_MODEL) log.info("Embedding model ready ✓") groq_client = Groq(api_key=GROQ_API_KEY) # ── Global vector store ──────────────────────────────────────── # These are module-level globals so every Gradio callback # can read/write them without passing objects around. vector_store: Optional[faiss.IndexFlatL2] = None # FAISS index chunks_store: list[str] = [] # parallel list of text chunks current_video_title: str = "" # shown in the UI # ───────────────────────────────────────────────────────────── # MODULE 3 ❯ YOUTUBE TRANSCRIPT FETCHER # ───────────────────────────────────────────────────────────── def extract_video_id(url: str) -> str: """ Extract the YouTube video ID from any common URL format. Handles: https://www.youtube.com/watch?v=VIDEO_ID https://youtu.be/VIDEO_ID https://youtube.com/shorts/VIDEO_ID https://www.youtube.com/embed/VIDEO_ID """ patterns = [ r"(?:v=)([A-Za-z0-9_-]{11})", r"youtu\.be/([A-Za-z0-9_-]{11})", r"shorts/([A-Za-z0-9_-]{11})", r"embed/([A-Za-z0-9_-]{11})", ] for pattern in patterns: match = re.search(pattern, url) if match: return match.group(1) raise ValueError(f"Could not extract video ID from URL: {url}") def get_transcript(url: str) -> tuple[str, str]: """ Fetch the transcript for a YouTube video. Returns ------- (transcript_text, status_message) On error: (empty string, error description) """ try: video_id = extract_video_id(url) log.info(f"Fetching transcript for video ID: {video_id}") api = YouTubeTranscriptApi() # .fetch() returns a FetchedTranscript object (updated API) transcript_data = api.fetch(video_id) # Join all text segments into one continuous string full_text = " ".join( segment.text.strip() for segment in transcript_data if segment.text.strip() ) word_count = len(full_text.split()) log.info(f"Transcript fetched — {word_count:,} words") return full_text, f"✅ Transcript fetched ({word_count:,} words)" except VideoUnavailable: return "", "❌ Video is unavailable or private." except TranscriptsDisabled: return "", "❌ Transcripts are disabled for this video." except NoTranscriptFound: return "", "❌ No transcript found. Try a video with auto-generated captions." except ValueError as e: return "", f"❌ Invalid URL — {e}" except Exception as e: log.exception("Unexpected error fetching transcript") return "", f"❌ Unexpected error: {e}" # ───────────────────────────────────────────────────────────── # MODULE 4 ❯ VECTOR DATABASE BUILDER # Splits transcript → chunks → embeddings → FAISS index # ───────────────────────────────────────────────────────────── def build_vector_store(transcript: str) -> str: """ Convert a raw transcript into a FAISS vector index. Steps ----- 1. Split text into overlapping chunks via RecursiveCharacterTextSplitter 2. Encode each chunk with the embedding model 3. Build a FAISS IndexFlatL2 and add the vectors 4. Store everything in module-level globals Returns ------- Status message string. """ global vector_store, chunks_store # ── Step 1: Chunk ────────────────────────────────────────── splitter = RecursiveCharacterTextSplitter( chunk_size=CHUNK_SIZE, chunk_overlap=CHUNK_OVERLAP, length_function=len, # character-based length separators=["\n\n", "\n", ". ", " ", ""], ) chunks = splitter.split_text(transcript) log.info(f"Created {len(chunks)} chunks") if not chunks: return "❌ No chunks created — transcript may be too short." # ── Step 2: Embed ────────────────────────────────────────── log.info("Encoding chunks …") embeddings = embedding_model.encode( chunks, show_progress_bar=False, batch_size=64, normalize_embeddings=True, # cosine similarity via inner product ) # ── Step 3: Index ────────────────────────────────────────── dimension = embeddings.shape[1] index = faiss.IndexFlatIP(dimension) # Inner Product → cosine on normalised vecs index.add(np.array(embeddings, dtype=np.float32)) # ── Step 4: Persist to globals ───────────────────────────── vector_store = index chunks_store = chunks log.info(f"FAISS index built — {index.ntotal} vectors, dim={dimension}") return f"✅ Indexed {len(chunks)} chunks into FAISS (dim={dimension})" # ───────────────────────────────────────────────────────────── # MODULE 5 ❯ RETRIEVER # Similarity search: query → top-k relevant chunks # ───────────────────────────────────────────────────────────── def retrieve_context(query: str, top_k: int = TOP_K) -> str: """ Retrieve the most semantically relevant chunks for a given query. Parameters ---------- query : user's natural-language question top_k : number of chunks to return Returns ------- String of concatenated retrieved chunks, separated by blank lines. """ if vector_store is None or not chunks_store: return "" # Embed and normalise the query (same preprocessing as the chunks) query_vec = embedding_model.encode( [query], normalize_embeddings=True, ) # FAISS inner-product search (cosine on normalised vectors) scores, indices = vector_store.search( np.array(query_vec, dtype=np.float32), top_k ) retrieved = [] for score, idx in zip(scores[0], indices[0]): if idx == -1: # FAISS returns -1 for empty slots continue retrieved.append(f"[Relevance: {score:.3f}]\n{chunks_store[idx]}") log.info(f"Retrieved {len(retrieved)} chunks for query: '{query[:60]}…'") return "\n\n---\n\n".join(retrieved) # ───────────────────────────────────────────────────────────── # MODULE 6 ❯ LLM — GROQ LLAMA 3.3-70B # Augment + Generate step of RAG # ───────────────────────────────────────────────────────────── SYSTEM_PROMPT = """\ You are a precise, helpful AI assistant that answers questions about YouTube videos \ based strictly on the provided transcript context. Rules: - Answer ONLY from the context provided. - If the context does not contain enough information, say so clearly. - Be concise but complete. - Use bullet points for lists or steps. - Never fabricate information not present in the context. """ def generate_answer(query: str) -> str: """ Full RAG generate step: 1. Retrieve relevant context from FAISS 2. Build an augmented prompt 3. Send to Groq LLaMA-3.3-70B 4. Return the model's response Parameters ---------- query : user's question Returns ------- The model's answer as a string. """ context = retrieve_context(query) if not context: return "⚠️ No relevant context found in the transcript for your question." user_message = f"""\ Context from the video transcript: {context} --- Question: {query} Answer:""" try: response = groq_client.chat.completions.create( model=GROQ_MODEL, messages=[ {"role": "system", "content": SYSTEM_PROMPT}, {"role": "user", "content": user_message}, ], max_tokens=MAX_NEW_TOKENS, temperature=0.2, # low temp → factual, grounded answers top_p=0.9, ) answer = response.choices[0].message.content.strip() log.info("LLM response received") return answer except Exception as e: log.exception("Groq API error") return f"❌ LLM error: {e}" # ───────────────────────────────────────────────────────────── # MODULE 7 ❯ ORCHESTRATION PIPELINE # Ties transcript fetch + vector store build together. # Called by the Gradio "Process Video" button. # ───────────────────────────────────────────────────────────── def process_video(url: str) -> tuple[str, str, str]: """ Full ingestion pipeline triggered by the UI. Returns ------- (transcript_preview, index_status, combined_status) suitable for Gradio outputs. """ global current_video_title if not url or not url.strip(): return "", "", "⚠️ Please enter a YouTube URL." # ── Phase 1: Fetch transcript ────────────────────────────── transcript, fetch_status = get_transcript(url.strip()) if not transcript: return "", "", fetch_status # ── Phase 2: Build vector store ─────────────────────────── index_status = build_vector_store(transcript) # ── Phase 3: Summary line for UI ────────────────────────── combined = f"{fetch_status}\n{index_status}\n\n💬 Video is ready — switch to the Chat tab!" # Show first 2000 chars in the transcript preview box preview = transcript[:2000] + (" …[truncated]" if len(transcript) > 2000 else "") return preview, index_status, combined # ───────────────────────────────────────────────────────────── # MODULE 8 ❯ CHAT HANDLER # Called on every user message in the Chat tab. # ───────────────────────────────────────────────────────────── def chat_with_video( user_query: str, history: list[tuple[str, str]], ) -> tuple[list[tuple[str, str]], str]: """ Handle a single chat turn. Parameters ---------- user_query : the question typed by the user history : Gradio chat history (list of (user, assistant) pairs) Returns ------- Updated history, empty string (clears the input box) """ if not user_query.strip(): return history, "" if vector_store is None: history.append((user_query, "⚠️ Please process a video first on the **Process Video** tab.")) return history, "" answer = generate_answer(user_query) history.append((user_query, answer)) return history, "" # ───────────────────────────────────────────────────────────── # MODULE 9 ❯ GRADIO USER INTERFACE # Professional two-tab layout: # Tab 1 — Process Video (URL input, status, transcript preview) # Tab 2 — Chat (conversation window + input) # ───────────────────────────────────────────────────────────── CSS = """ /* ── Global ── */ #app-header { text-align: center; margin-bottom: 0.5rem; } #status-box textarea { font-size: 0.85rem; color: var(--body-text-color); background: var(--input-background-fill); } #transcript-box textarea { font-size: 0.8rem; } #chat-window { height: 480px; } /* ── Send on Enter ── */ #chat-input textarea { resize: none; } """ with gr.Blocks( title="YouTube RAG Q&A", theme=gr.themes.Soft( primary_hue="indigo", neutral_hue="slate", font=gr.themes.GoogleFont("Inter"), ), css=CSS, ) as app: # ── Header ───────────────────────────────────────────────── gr.Markdown( """ # 🎥 YouTube RAG Q&A **Paste any YouTube URL → transcribe → chat with the video using AI** *Powered by [Groq](https://groq.com) · LLaMA 3.3-70B · FAISS · Sentence-Transformers* """, elem_id="app-header", ) # ── Tab 1: Process Video ──────────────────────────────────── with gr.Tab("📥 Process Video", id="tab-process"): with gr.Row(): url_input = gr.Textbox( label="YouTube URL", placeholder="https://www.youtube.com/watch?v=...", scale=4, ) process_btn = gr.Button( "▶ Transcribe & Index", variant="primary", scale=1, min_width=180, ) status_output = gr.Textbox( label="Pipeline Status", interactive=False, lines=4, elem_id="status-box", ) with gr.Accordion("📄 Transcript Preview (first 2000 chars)", open=False): transcript_output = gr.Textbox( label="Raw transcript", interactive=False, lines=12, elem_id="transcript-box", ) # ── Wiring ──────────────────────────────────────────── process_btn.click( fn=process_video, inputs=url_input, outputs=[transcript_output, gr.Textbox(visible=False), status_output], ) # ── Tab 2: Chat ───────────────────────────────────────────── with gr.Tab("💬 Chat with Video", id="tab-chat"): chatbot = gr.Chatbot( label="Conversation", bubble_full_width=False, height=480, elem_id="chat-window", ) with gr.Row(): chat_input = gr.Textbox( placeholder="Ask anything about the video…", label="", scale=5, elem_id="chat-input", autofocus=True, ) send_btn = gr.Button("Send ➤", variant="primary", scale=1, min_width=100) clear_btn = gr.Button("🗑 Clear conversation", variant="secondary", size="sm") # ── Wiring ──────────────────────────────────────────── # Submit on button click or Enter key send_btn.click( fn=chat_with_video, inputs=[chat_input, chatbot], outputs=[chatbot, chat_input], ) chat_input.submit( fn=chat_with_video, inputs=[chat_input, chatbot], outputs=[chatbot, chat_input], ) clear_btn.click(fn=lambda: [], outputs=chatbot) # ── Footer ────────────────────────────────────────────────── gr.Markdown( "