agnixcode's picture
Create app.py
f93afb8 verified
# ============================================================
# YouTube RAG Q&A System β€” Production-Quality Colab Notebook
# Author : Your Name
# Model : Groq LLaMA-3.3-70B-Versatile (128K context)
# Embedder: all-MiniLM-L6-v2 (Sentence-Transformers, free)
# Vector DB: FAISS (Facebook AI, free, CPU)
# UI : Gradio 4.x
# ============================================================
# ─────────────────────────────────────────────────────────────
# MODULE 0 ❯ INSTALLATION
# Run this cell once. Restart runtime after it finishes.
# ─────────────────────────────────────────────────────────────
# !pip install -q \
# gradio \
# youtube-transcript-api \
# sentence-transformers \
# faiss-cpu \
# groq \
# langchain-text-splitters \
# python-dotenv
# ─────────────────────────────────────────────────────────────
# MODULE 1 ❯ IMPORTS & CONFIGURATION
# All third-party imports live here.
# API key is read from Colab Secrets (preferred) or env var.
# ─────────────────────────────────────────────────────────────
import os
import re
import logging
from typing import Optional
# ── UI framework ─────────────────────────────────────────────
import gradio as gr
# ── YouTube transcript (free, no API key required) ───────────
from youtube_transcript_api import YouTubeTranscriptApi
from youtube_transcript_api._errors import (
TranscriptsDisabled,
NoTranscriptFound,
VideoUnavailable,
)
# ── Embedding model (local, runs on CPU) ─────────────────────
from sentence_transformers import SentenceTransformer
# ── Text splitting ────────────────────────────────────────────
from langchain_text_splitters import RecursiveCharacterTextSplitter
# ── Numerical / vector DB ─────────────────────────────────────
import numpy as np
import faiss
# ── Groq LLM client ───────────────────────────────────────────
from groq import Groq
# ── Logging β€” shows clean status in Colab output ──────────────
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s | %(levelname)s | %(message)s",
datefmt="%H:%M:%S",
)
log = logging.getLogger("rag")
# ── API key ────────────────────────────────────────────────────
# Option A (recommended in Colab): use Secrets panel (πŸ”‘ left sidebar)
# key name β†’ GROQ_API_KEY
try:
from google.colab import userdata # type: ignore
GROQ_API_KEY = userdata.get("GROQ_API_KEY")
except Exception:
GROQ_API_KEY = os.getenv("GROQ_API_KEY", "")
if not GROQ_API_KEY:
raise EnvironmentError(
"⚠️ GROQ_API_KEY not found. "
"Add it via Colab Secrets (πŸ”‘) or set os.environ['GROQ_API_KEY']."
)
# ── Model identifiers ──────────────────────────────────────────
GROQ_MODEL = "llama-3.3-70b-versatile" # 128K context, best OSS on Groq 2025
EMBED_MODEL = "all-MiniLM-L6-v2" # 384-dim, fast, free, CPU-friendly
CHUNK_SIZE = 500 # tokens per chunk
CHUNK_OVERLAP = 50 # overlap to preserve context across chunks
TOP_K = 4 # how many chunks to retrieve per query
MAX_NEW_TOKENS = 1024 # LLM answer budget
# ─────────────────────────────────────────────────────────────
# MODULE 2 ❯ MODEL INITIALISATION
# Load embedding model once at startup so every call is fast.
# Groq client is stateless β€” one instance is enough.
# ─────────────────────────────────────────────────────────────
log.info("Loading embedding model …")
embedding_model = SentenceTransformer(EMBED_MODEL)
log.info("Embedding model ready βœ“")
groq_client = Groq(api_key=GROQ_API_KEY)
# ── Global vector store ────────────────────────────────────────
# These are module-level globals so every Gradio callback
# can read/write them without passing objects around.
vector_store: Optional[faiss.IndexFlatL2] = None # FAISS index
chunks_store: list[str] = [] # parallel list of text chunks
current_video_title: str = "" # shown in the UI
# ─────────────────────────────────────────────────────────────
# MODULE 3 ❯ YOUTUBE TRANSCRIPT FETCHER
# ─────────────────────────────────────────────────────────────
def extract_video_id(url: str) -> str:
"""
Extract the YouTube video ID from any common URL format.
Handles:
https://www.youtube.com/watch?v=VIDEO_ID
https://youtu.be/VIDEO_ID
https://youtube.com/shorts/VIDEO_ID
https://www.youtube.com/embed/VIDEO_ID
"""
patterns = [
r"(?:v=)([A-Za-z0-9_-]{11})",
r"youtu\.be/([A-Za-z0-9_-]{11})",
r"shorts/([A-Za-z0-9_-]{11})",
r"embed/([A-Za-z0-9_-]{11})",
]
for pattern in patterns:
match = re.search(pattern, url)
if match:
return match.group(1)
raise ValueError(f"Could not extract video ID from URL: {url}")
def get_transcript(url: str) -> tuple[str, str]:
"""
Fetch the transcript for a YouTube video.
Returns
-------
(transcript_text, status_message)
On error: (empty string, error description)
"""
try:
video_id = extract_video_id(url)
log.info(f"Fetching transcript for video ID: {video_id}")
api = YouTubeTranscriptApi()
# .fetch() returns a FetchedTranscript object (updated API)
transcript_data = api.fetch(video_id)
# Join all text segments into one continuous string
full_text = " ".join(
segment.text.strip()
for segment in transcript_data
if segment.text.strip()
)
word_count = len(full_text.split())
log.info(f"Transcript fetched β€” {word_count:,} words")
return full_text, f"βœ… Transcript fetched ({word_count:,} words)"
except VideoUnavailable:
return "", "❌ Video is unavailable or private."
except TranscriptsDisabled:
return "", "❌ Transcripts are disabled for this video."
except NoTranscriptFound:
return "", "❌ No transcript found. Try a video with auto-generated captions."
except ValueError as e:
return "", f"❌ Invalid URL β€” {e}"
except Exception as e:
log.exception("Unexpected error fetching transcript")
return "", f"❌ Unexpected error: {e}"
# ─────────────────────────────────────────────────────────────
# MODULE 4 ❯ VECTOR DATABASE BUILDER
# Splits transcript β†’ chunks β†’ embeddings β†’ FAISS index
# ─────────────────────────────────────────────────────────────
def build_vector_store(transcript: str) -> str:
"""
Convert a raw transcript into a FAISS vector index.
Steps
-----
1. Split text into overlapping chunks via RecursiveCharacterTextSplitter
2. Encode each chunk with the embedding model
3. Build a FAISS IndexFlatL2 and add the vectors
4. Store everything in module-level globals
Returns
-------
Status message string.
"""
global vector_store, chunks_store
# ── Step 1: Chunk ──────────────────────────────────────────
splitter = RecursiveCharacterTextSplitter(
chunk_size=CHUNK_SIZE,
chunk_overlap=CHUNK_OVERLAP,
length_function=len, # character-based length
separators=["\n\n", "\n", ". ", " ", ""],
)
chunks = splitter.split_text(transcript)
log.info(f"Created {len(chunks)} chunks")
if not chunks:
return "❌ No chunks created β€” transcript may be too short."
# ── Step 2: Embed ──────────────────────────────────────────
log.info("Encoding chunks …")
embeddings = embedding_model.encode(
chunks,
show_progress_bar=False,
batch_size=64,
normalize_embeddings=True, # cosine similarity via inner product
)
# ── Step 3: Index ──────────────────────────────────────────
dimension = embeddings.shape[1]
index = faiss.IndexFlatIP(dimension) # Inner Product β†’ cosine on normalised vecs
index.add(np.array(embeddings, dtype=np.float32))
# ── Step 4: Persist to globals ─────────────────────────────
vector_store = index
chunks_store = chunks
log.info(f"FAISS index built β€” {index.ntotal} vectors, dim={dimension}")
return f"βœ… Indexed {len(chunks)} chunks into FAISS (dim={dimension})"
# ─────────────────────────────────────────────────────────────
# MODULE 5 ❯ RETRIEVER
# Similarity search: query β†’ top-k relevant chunks
# ─────────────────────────────────────────────────────────────
def retrieve_context(query: str, top_k: int = TOP_K) -> str:
"""
Retrieve the most semantically relevant chunks for a given query.
Parameters
----------
query : user's natural-language question
top_k : number of chunks to return
Returns
-------
String of concatenated retrieved chunks, separated by blank lines.
"""
if vector_store is None or not chunks_store:
return ""
# Embed and normalise the query (same preprocessing as the chunks)
query_vec = embedding_model.encode(
[query],
normalize_embeddings=True,
)
# FAISS inner-product search (cosine on normalised vectors)
scores, indices = vector_store.search(
np.array(query_vec, dtype=np.float32), top_k
)
retrieved = []
for score, idx in zip(scores[0], indices[0]):
if idx == -1: # FAISS returns -1 for empty slots
continue
retrieved.append(f"[Relevance: {score:.3f}]\n{chunks_store[idx]}")
log.info(f"Retrieved {len(retrieved)} chunks for query: '{query[:60]}…'")
return "\n\n---\n\n".join(retrieved)
# ─────────────────────────────────────────────────────────────
# MODULE 6 ❯ LLM β€” GROQ LLAMA 3.3-70B
# Augment + Generate step of RAG
# ─────────────────────────────────────────────────────────────
SYSTEM_PROMPT = """\
You are a precise, helpful AI assistant that answers questions about YouTube videos \
based strictly on the provided transcript context.
Rules:
- Answer ONLY from the context provided.
- If the context does not contain enough information, say so clearly.
- Be concise but complete.
- Use bullet points for lists or steps.
- Never fabricate information not present in the context.
"""
def generate_answer(query: str) -> str:
"""
Full RAG generate step:
1. Retrieve relevant context from FAISS
2. Build an augmented prompt
3. Send to Groq LLaMA-3.3-70B
4. Return the model's response
Parameters
----------
query : user's question
Returns
-------
The model's answer as a string.
"""
context = retrieve_context(query)
if not context:
return "⚠️ No relevant context found in the transcript for your question."
user_message = f"""\
Context from the video transcript:
{context}
---
Question: {query}
Answer:"""
try:
response = groq_client.chat.completions.create(
model=GROQ_MODEL,
messages=[
{"role": "system", "content": SYSTEM_PROMPT},
{"role": "user", "content": user_message},
],
max_tokens=MAX_NEW_TOKENS,
temperature=0.2, # low temp β†’ factual, grounded answers
top_p=0.9,
)
answer = response.choices[0].message.content.strip()
log.info("LLM response received")
return answer
except Exception as e:
log.exception("Groq API error")
return f"❌ LLM error: {e}"
# ─────────────────────────────────────────────────────────────
# MODULE 7 ❯ ORCHESTRATION PIPELINE
# Ties transcript fetch + vector store build together.
# Called by the Gradio "Process Video" button.
# ─────────────────────────────────────────────────────────────
def process_video(url: str) -> tuple[str, str, str]:
"""
Full ingestion pipeline triggered by the UI.
Returns
-------
(transcript_preview, index_status, combined_status)
suitable for Gradio outputs.
"""
global current_video_title
if not url or not url.strip():
return "", "", "⚠️ Please enter a YouTube URL."
# ── Phase 1: Fetch transcript ──────────────────────────────
transcript, fetch_status = get_transcript(url.strip())
if not transcript:
return "", "", fetch_status
# ── Phase 2: Build vector store ───────────────────────────
index_status = build_vector_store(transcript)
# ── Phase 3: Summary line for UI ──────────────────────────
combined = f"{fetch_status}\n{index_status}\n\nπŸ’¬ Video is ready β€” switch to the Chat tab!"
# Show first 2000 chars in the transcript preview box
preview = transcript[:2000] + (" …[truncated]" if len(transcript) > 2000 else "")
return preview, index_status, combined
# ─────────────────────────────────────────────────────────────
# MODULE 8 ❯ CHAT HANDLER
# Called on every user message in the Chat tab.
# ─────────────────────────────────────────────────────────────
def chat_with_video(
user_query: str,
history: list[tuple[str, str]],
) -> tuple[list[tuple[str, str]], str]:
"""
Handle a single chat turn.
Parameters
----------
user_query : the question typed by the user
history : Gradio chat history (list of (user, assistant) pairs)
Returns
-------
Updated history, empty string (clears the input box)
"""
if not user_query.strip():
return history, ""
if vector_store is None:
history.append((user_query, "⚠️ Please process a video first on the **Process Video** tab."))
return history, ""
answer = generate_answer(user_query)
history.append((user_query, answer))
return history, ""
# ─────────────────────────────────────────────────────────────
# MODULE 9 ❯ GRADIO USER INTERFACE
# Professional two-tab layout:
# Tab 1 β€” Process Video (URL input, status, transcript preview)
# Tab 2 β€” Chat (conversation window + input)
# ─────────────────────────────────────────────────────────────
CSS = """
/* ── Global ── */
#app-header { text-align: center; margin-bottom: 0.5rem; }
#status-box textarea {
font-size: 0.85rem;
color: var(--body-text-color);
background: var(--input-background-fill);
}
#transcript-box textarea { font-size: 0.8rem; }
#chat-window { height: 480px; }
/* ── Send on Enter ── */
#chat-input textarea { resize: none; }
"""
with gr.Blocks(
title="YouTube RAG Q&A",
theme=gr.themes.Soft(
primary_hue="indigo",
neutral_hue="slate",
font=gr.themes.GoogleFont("Inter"),
),
css=CSS,
) as app:
# ── Header ─────────────────────────────────────────────────
gr.Markdown(
"""
# πŸŽ₯ YouTube RAG Q&A
**Paste any YouTube URL β†’ transcribe β†’ chat with the video using AI**
*Powered by [Groq](https://groq.com) Β· LLaMA 3.3-70B Β· FAISS Β· Sentence-Transformers*
""",
elem_id="app-header",
)
# ── Tab 1: Process Video ────────────────────────────────────
with gr.Tab("πŸ“₯ Process Video", id="tab-process"):
with gr.Row():
url_input = gr.Textbox(
label="YouTube URL",
placeholder="https://www.youtube.com/watch?v=...",
scale=4,
)
process_btn = gr.Button(
"β–Ά Transcribe & Index",
variant="primary",
scale=1,
min_width=180,
)
status_output = gr.Textbox(
label="Pipeline Status",
interactive=False,
lines=4,
elem_id="status-box",
)
with gr.Accordion("πŸ“„ Transcript Preview (first 2000 chars)", open=False):
transcript_output = gr.Textbox(
label="Raw transcript",
interactive=False,
lines=12,
elem_id="transcript-box",
)
# ── Wiring ────────────────────────────────────────────
process_btn.click(
fn=process_video,
inputs=url_input,
outputs=[transcript_output, gr.Textbox(visible=False), status_output],
)
# ── Tab 2: Chat ─────────────────────────────────────────────
with gr.Tab("πŸ’¬ Chat with Video", id="tab-chat"):
chatbot = gr.Chatbot(
label="Conversation",
bubble_full_width=False,
height=480,
elem_id="chat-window",
)
with gr.Row():
chat_input = gr.Textbox(
placeholder="Ask anything about the video…",
label="",
scale=5,
elem_id="chat-input",
autofocus=True,
)
send_btn = gr.Button("Send ➀", variant="primary", scale=1, min_width=100)
clear_btn = gr.Button("πŸ—‘ Clear conversation", variant="secondary", size="sm")
# ── Wiring ────────────────────────────────────────────
# Submit on button click or Enter key
send_btn.click(
fn=chat_with_video,
inputs=[chat_input, chatbot],
outputs=[chatbot, chat_input],
)
chat_input.submit(
fn=chat_with_video,
inputs=[chat_input, chatbot],
outputs=[chatbot, chat_input],
)
clear_btn.click(fn=lambda: [], outputs=chatbot)
# ── Footer ──────────────────────────────────────────────────
gr.Markdown(
"<center style='font-size:0.75rem; color:#888;'>"
"Open-source Β· No data stored Β· Transcript processed locally"
"</center>"
)
# ─────────────────────────────────────────────────────────────
# MODULE 10 ❯ LAUNCH
# ─────────────────────────────────────────────────────────────
if __name__ == "__main__":
app.launch(
debug=True, # shows tracebacks in output
share=True, # creates a public gradio.live link (great for demos)
show_error=True,
)