Spaces:
Runtime error
Runtime error
| import os | |
| import re | |
| import gradio as gr | |
| import numpy as np | |
| import faiss | |
| from youtube_transcript_api import YouTubeTranscriptApi | |
| from sentence_transformers import Transformer, SentenceTransformer | |
| from langchain_text_splitters import RecursiveCharacterTextSplitter | |
| from groq import Groq | |
| # =============================== | |
| # CONFIGURATION | |
| # =============================== | |
| # Load Groq API Key from Hugging Face Secrets | |
| GROQ_API_KEY = os.getenv("GROQ_API_KEY") | |
| groq_client = Groq(api_key=GROQ_API_KEY) if GROQ_API_KEY else None | |
| # Load embedding model | |
| embedding_model = SentenceTransformer("all-MiniLM-L6-v2") | |
| # Global variables to store the "brain" of the current video | |
| vector_store = None | |
| chunks_store = [] | |
| # =============================== | |
| # CORE FUNCTIONS | |
| # =============================== | |
| def extract_video_id(url): | |
| """Extracts the 11-character YouTube video ID.""" | |
| # Handles standard URLs, shorts, and shared links | |
| regex = r"(?:v=|\/|be\/|embed\/|shorts\/)([0-9A-Za-z_-]{11})" | |
| match = re.search(regex, url) | |
| return match.group(1) if match else None | |
| def get_transcript(url): | |
| """Fetches transcript from YouTube.""" | |
| video_id = extract_video_id(url) | |
| if not video_id: | |
| return "ERROR: Invalid YouTube URL." | |
| try: | |
| # Correct Method Call using the imported class | |
| transcript_list = YouTubeTranscriptApi.get_transcript(video_id) | |
| text = " ".join([i['text'] for i in transcript_list]) | |
| return text | |
| except Exception as e: | |
| return f"ERROR: Could not retrieve transcript. (Details: {str(e)})" | |
| def build_vector_index(text): | |
| """Chunks text and stores it in a FAISS vector database.""" | |
| global vector_store, chunks_store | |
| # 1. Chunking | |
| splitter = RecursiveCharacterTextSplitter(chunk_size=600, chunk_overlap=60) | |
| chunks_store = splitter.split_text(text) | |
| # 2. Embedding | |
| embeddings = embedding_model.encode(chunks_store) | |
| # 3. Indexing with FAISS | |
| dimension = embeddings.shape[1] | |
| index = faiss.IndexFlatL2(dimension) | |
| index.add(np.array(embeddings).astype('float32')) | |
| vector_store = index | |
| def get_ai_response(user_query): | |
| """Retrieves context and asks Groq Llama 3.""" | |
| if vector_store is None or not chunks_store: | |
| return "Please load a video first." | |
| # Search for relevant chunks | |
| query_embedding = embedding_model.encode([user_query]) | |
| D, I = vector_store.search(np.array(query_embedding).astype('float32'), k=3) | |
| context = "\n".join([chunks_store[i] for i in I[0] if i != -1]) | |
| prompt = f"""Use the following video transcript context to answer the question. | |
| If the answer isn't in the context, say you don't know based on the video. | |
| Context: {context} | |
| Question: {user_query} | |
| Answer:""" | |
| try: | |
| completion = groq_client.chat.completions.create( | |
| model="llama-3.3-70b-versatile", | |
| messages=[{"role": "user", "content": prompt}] | |
| ) | |
| return completion.choices[0].message.content | |
| except Exception as e: | |
| return f"AI Error: {str(e)}" | |
| # =============================== | |
| # UI LOGIC | |
| # =============================== | |
| def process_video_step(url): | |
| transcript = get_transcript(url) | |
| if transcript.startswith("ERROR"): | |
| return transcript, "❌ Failed" | |
| build_vector_index(transcript) | |
| return transcript[:1000] + "...", "✅ Video Indexed! Go to Chat tab." | |
| def chat_step(message, history): | |
| if not GROQ_API_KEY: | |
| history.append((message, "Error: Groq API Key missing in Secrets.")) | |
| return history, "" | |
| answer = get_ai_response(message) | |
| history.append((message, answer)) | |
| return history, "" | |
| # =============================== | |
| # GRADIO INTERFACE | |
| # =============================== | |
| with gr.Blocks(theme=gr.themes.Soft()) as demo: | |
| gr.Markdown("# 📺 YouTube AI Expert (RAG)") | |
| with gr.Tabs(): | |
| with gr.Tab("1. Setup Video"): | |
| url_input = gr.Textbox(label="YouTube URL", placeholder="https://www.youtube.com/watch?v=...") | |
| process_btn = gr.Button("Process Video", variant="primary") | |
| status = gr.Textbox(label="Status") | |
| preview = gr.Textbox(label="Transcript Preview (First 1000 chars)", lines=5) | |
| process_btn.click(process_video_step, inputs=url_input, outputs=[preview, status]) | |
| with gr.Tab("2. Chat with Video"): | |
| chatbot = gr.Chatbot(height=400) | |
| msg = gr.Textbox(label="Ask anything about the video...") | |
| clear = gr.ClearButton([msg, chatbot]) | |
| msg.submit(chat_step, [msg, chatbot], [chatbot, msg]) | |
| if __name__ == "__main__": | |
| demo.launch() | |