import os import re import gradio as gr import numpy as np import faiss from youtube_transcript_api import YouTubeTranscriptApi from sentence_transformers import Transformer, SentenceTransformer from langchain_text_splitters import RecursiveCharacterTextSplitter from groq import Groq # =============================== # CONFIGURATION # =============================== # Load Groq API Key from Hugging Face Secrets GROQ_API_KEY = os.getenv("GROQ_API_KEY") groq_client = Groq(api_key=GROQ_API_KEY) if GROQ_API_KEY else None # Load embedding model embedding_model = SentenceTransformer("all-MiniLM-L6-v2") # Global variables to store the "brain" of the current video vector_store = None chunks_store = [] # =============================== # CORE FUNCTIONS # =============================== def extract_video_id(url): """Extracts the 11-character YouTube video ID.""" # Handles standard URLs, shorts, and shared links regex = r"(?:v=|\/|be\/|embed\/|shorts\/)([0-9A-Za-z_-]{11})" match = re.search(regex, url) return match.group(1) if match else None def get_transcript(url): """Fetches transcript from YouTube.""" video_id = extract_video_id(url) if not video_id: return "ERROR: Invalid YouTube URL." try: # Correct Method Call using the imported class transcript_list = YouTubeTranscriptApi.get_transcript(video_id) text = " ".join([i['text'] for i in transcript_list]) return text except Exception as e: return f"ERROR: Could not retrieve transcript. (Details: {str(e)})" def build_vector_index(text): """Chunks text and stores it in a FAISS vector database.""" global vector_store, chunks_store # 1. Chunking splitter = RecursiveCharacterTextSplitter(chunk_size=600, chunk_overlap=60) chunks_store = splitter.split_text(text) # 2. Embedding embeddings = embedding_model.encode(chunks_store) # 3. Indexing with FAISS dimension = embeddings.shape[1] index = faiss.IndexFlatL2(dimension) index.add(np.array(embeddings).astype('float32')) vector_store = index def get_ai_response(user_query): """Retrieves context and asks Groq Llama 3.""" if vector_store is None or not chunks_store: return "Please load a video first." # Search for relevant chunks query_embedding = embedding_model.encode([user_query]) D, I = vector_store.search(np.array(query_embedding).astype('float32'), k=3) context = "\n".join([chunks_store[i] for i in I[0] if i != -1]) prompt = f"""Use the following video transcript context to answer the question. If the answer isn't in the context, say you don't know based on the video. Context: {context} Question: {user_query} Answer:""" try: completion = groq_client.chat.completions.create( model="llama-3.3-70b-versatile", messages=[{"role": "user", "content": prompt}] ) return completion.choices[0].message.content except Exception as e: return f"AI Error: {str(e)}" # =============================== # UI LOGIC # =============================== def process_video_step(url): transcript = get_transcript(url) if transcript.startswith("ERROR"): return transcript, "❌ Failed" build_vector_index(transcript) return transcript[:1000] + "...", "✅ Video Indexed! Go to Chat tab." def chat_step(message, history): if not GROQ_API_KEY: history.append((message, "Error: Groq API Key missing in Secrets.")) return history, "" answer = get_ai_response(message) history.append((message, answer)) return history, "" # =============================== # GRADIO INTERFACE # =============================== with gr.Blocks(theme=gr.themes.Soft()) as demo: gr.Markdown("# 📺 YouTube AI Expert (RAG)") with gr.Tabs(): with gr.Tab("1. Setup Video"): url_input = gr.Textbox(label="YouTube URL", placeholder="https://www.youtube.com/watch?v=...") process_btn = gr.Button("Process Video", variant="primary") status = gr.Textbox(label="Status") preview = gr.Textbox(label="Transcript Preview (First 1000 chars)", lines=5) process_btn.click(process_video_step, inputs=url_input, outputs=[preview, status]) with gr.Tab("2. Chat with Video"): chatbot = gr.Chatbot(height=400) msg = gr.Textbox(label="Ask anything about the video...") clear = gr.ClearButton([msg, chatbot]) msg.submit(chat_step, [msg, chatbot], [chatbot, msg]) if __name__ == "__main__": demo.launch()