agnixcode's picture
Update app.py
587e2e0 verified
import os
import re
import gradio as gr
import numpy as np
import faiss
from youtube_transcript_api import YouTubeTranscriptApi
from sentence_transformers import Transformer, SentenceTransformer
from langchain_text_splitters import RecursiveCharacterTextSplitter
from groq import Groq
# ===============================
# CONFIGURATION
# ===============================
# Load Groq API Key from Hugging Face Secrets
GROQ_API_KEY = os.getenv("GROQ_API_KEY")
groq_client = Groq(api_key=GROQ_API_KEY) if GROQ_API_KEY else None
# Load embedding model
embedding_model = SentenceTransformer("all-MiniLM-L6-v2")
# Global variables to store the "brain" of the current video
vector_store = None
chunks_store = []
# ===============================
# CORE FUNCTIONS
# ===============================
def extract_video_id(url):
"""Extracts the 11-character YouTube video ID."""
# Handles standard URLs, shorts, and shared links
regex = r"(?:v=|\/|be\/|embed\/|shorts\/)([0-9A-Za-z_-]{11})"
match = re.search(regex, url)
return match.group(1) if match else None
def get_transcript(url):
"""Fetches transcript from YouTube."""
video_id = extract_video_id(url)
if not video_id:
return "ERROR: Invalid YouTube URL."
try:
# Correct Method Call using the imported class
transcript_list = YouTubeTranscriptApi.get_transcript(video_id)
text = " ".join([i['text'] for i in transcript_list])
return text
except Exception as e:
return f"ERROR: Could not retrieve transcript. (Details: {str(e)})"
def build_vector_index(text):
"""Chunks text and stores it in a FAISS vector database."""
global vector_store, chunks_store
# 1. Chunking
splitter = RecursiveCharacterTextSplitter(chunk_size=600, chunk_overlap=60)
chunks_store = splitter.split_text(text)
# 2. Embedding
embeddings = embedding_model.encode(chunks_store)
# 3. Indexing with FAISS
dimension = embeddings.shape[1]
index = faiss.IndexFlatL2(dimension)
index.add(np.array(embeddings).astype('float32'))
vector_store = index
def get_ai_response(user_query):
"""Retrieves context and asks Groq Llama 3."""
if vector_store is None or not chunks_store:
return "Please load a video first."
# Search for relevant chunks
query_embedding = embedding_model.encode([user_query])
D, I = vector_store.search(np.array(query_embedding).astype('float32'), k=3)
context = "\n".join([chunks_store[i] for i in I[0] if i != -1])
prompt = f"""Use the following video transcript context to answer the question.
If the answer isn't in the context, say you don't know based on the video.
Context: {context}
Question: {user_query}
Answer:"""
try:
completion = groq_client.chat.completions.create(
model="llama-3.3-70b-versatile",
messages=[{"role": "user", "content": prompt}]
)
return completion.choices[0].message.content
except Exception as e:
return f"AI Error: {str(e)}"
# ===============================
# UI LOGIC
# ===============================
def process_video_step(url):
transcript = get_transcript(url)
if transcript.startswith("ERROR"):
return transcript, "❌ Failed"
build_vector_index(transcript)
return transcript[:1000] + "...", "✅ Video Indexed! Go to Chat tab."
def chat_step(message, history):
if not GROQ_API_KEY:
history.append((message, "Error: Groq API Key missing in Secrets."))
return history, ""
answer = get_ai_response(message)
history.append((message, answer))
return history, ""
# ===============================
# GRADIO INTERFACE
# ===============================
with gr.Blocks(theme=gr.themes.Soft()) as demo:
gr.Markdown("# 📺 YouTube AI Expert (RAG)")
with gr.Tabs():
with gr.Tab("1. Setup Video"):
url_input = gr.Textbox(label="YouTube URL", placeholder="https://www.youtube.com/watch?v=...")
process_btn = gr.Button("Process Video", variant="primary")
status = gr.Textbox(label="Status")
preview = gr.Textbox(label="Transcript Preview (First 1000 chars)", lines=5)
process_btn.click(process_video_step, inputs=url_input, outputs=[preview, status])
with gr.Tab("2. Chat with Video"):
chatbot = gr.Chatbot(height=400)
msg = gr.Textbox(label="Ask anything about the video...")
clear = gr.ClearButton([msg, chatbot])
msg.submit(chat_step, [msg, chatbot], [chatbot, msg])
if __name__ == "__main__":
demo.launch()