Spaces:
Runtime error
Runtime error
File size: 4,709 Bytes
7bd0e02 d46d52c 7bd0e02 7462c22 587e2e0 7462c22 670c1c5 7462c22 587e2e0 7bd0e02 587e2e0 7462c22 670c1c5 7462c22 670c1c5 7462c22 670c1c5 587e2e0 7bd0e02 670c1c5 7462c22 587e2e0 670c1c5 7bd0e02 587e2e0 7bd0e02 587e2e0 7bd0e02 670c1c5 7462c22 670c1c5 7bd0e02 670c1c5 7462c22 670c1c5 bcdefd2 670c1c5 7462c22 7bd0e02 7462c22 7bd0e02 670c1c5 7bd0e02 670c1c5 7bd0e02 587e2e0 670c1c5 7bd0e02 670c1c5 7bd0e02 670c1c5 7bd0e02 670c1c5 7462c22 670c1c5 7462c22 670c1c5 7bd0e02 670c1c5 587e2e0 7462c22 670c1c5 7bd0e02 d46d52c 7bd0e02 d46d52c 670c1c5 d46d52c 7bd0e02 670c1c5 7bd0e02 670c1c5 7bd0e02 670c1c5 7bd0e02 670c1c5 7bd0e02 670c1c5 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 | import os
import re
import gradio as gr
import numpy as np
import faiss
from youtube_transcript_api import YouTubeTranscriptApi
from sentence_transformers import Transformer, SentenceTransformer
from langchain_text_splitters import RecursiveCharacterTextSplitter
from groq import Groq
# ===============================
# CONFIGURATION
# ===============================
# Load Groq API Key from Hugging Face Secrets
GROQ_API_KEY = os.getenv("GROQ_API_KEY")
groq_client = Groq(api_key=GROQ_API_KEY) if GROQ_API_KEY else None
# Load embedding model
embedding_model = SentenceTransformer("all-MiniLM-L6-v2")
# Global variables to store the "brain" of the current video
vector_store = None
chunks_store = []
# ===============================
# CORE FUNCTIONS
# ===============================
def extract_video_id(url):
"""Extracts the 11-character YouTube video ID."""
# Handles standard URLs, shorts, and shared links
regex = r"(?:v=|\/|be\/|embed\/|shorts\/)([0-9A-Za-z_-]{11})"
match = re.search(regex, url)
return match.group(1) if match else None
def get_transcript(url):
"""Fetches transcript from YouTube."""
video_id = extract_video_id(url)
if not video_id:
return "ERROR: Invalid YouTube URL."
try:
# Correct Method Call using the imported class
transcript_list = YouTubeTranscriptApi.get_transcript(video_id)
text = " ".join([i['text'] for i in transcript_list])
return text
except Exception as e:
return f"ERROR: Could not retrieve transcript. (Details: {str(e)})"
def build_vector_index(text):
"""Chunks text and stores it in a FAISS vector database."""
global vector_store, chunks_store
# 1. Chunking
splitter = RecursiveCharacterTextSplitter(chunk_size=600, chunk_overlap=60)
chunks_store = splitter.split_text(text)
# 2. Embedding
embeddings = embedding_model.encode(chunks_store)
# 3. Indexing with FAISS
dimension = embeddings.shape[1]
index = faiss.IndexFlatL2(dimension)
index.add(np.array(embeddings).astype('float32'))
vector_store = index
def get_ai_response(user_query):
"""Retrieves context and asks Groq Llama 3."""
if vector_store is None or not chunks_store:
return "Please load a video first."
# Search for relevant chunks
query_embedding = embedding_model.encode([user_query])
D, I = vector_store.search(np.array(query_embedding).astype('float32'), k=3)
context = "\n".join([chunks_store[i] for i in I[0] if i != -1])
prompt = f"""Use the following video transcript context to answer the question.
If the answer isn't in the context, say you don't know based on the video.
Context: {context}
Question: {user_query}
Answer:"""
try:
completion = groq_client.chat.completions.create(
model="llama-3.3-70b-versatile",
messages=[{"role": "user", "content": prompt}]
)
return completion.choices[0].message.content
except Exception as e:
return f"AI Error: {str(e)}"
# ===============================
# UI LOGIC
# ===============================
def process_video_step(url):
transcript = get_transcript(url)
if transcript.startswith("ERROR"):
return transcript, "❌ Failed"
build_vector_index(transcript)
return transcript[:1000] + "...", "✅ Video Indexed! Go to Chat tab."
def chat_step(message, history):
if not GROQ_API_KEY:
history.append((message, "Error: Groq API Key missing in Secrets."))
return history, ""
answer = get_ai_response(message)
history.append((message, answer))
return history, ""
# ===============================
# GRADIO INTERFACE
# ===============================
with gr.Blocks(theme=gr.themes.Soft()) as demo:
gr.Markdown("# 📺 YouTube AI Expert (RAG)")
with gr.Tabs():
with gr.Tab("1. Setup Video"):
url_input = gr.Textbox(label="YouTube URL", placeholder="https://www.youtube.com/watch?v=...")
process_btn = gr.Button("Process Video", variant="primary")
status = gr.Textbox(label="Status")
preview = gr.Textbox(label="Transcript Preview (First 1000 chars)", lines=5)
process_btn.click(process_video_step, inputs=url_input, outputs=[preview, status])
with gr.Tab("2. Chat with Video"):
chatbot = gr.Chatbot(height=400)
msg = gr.Textbox(label="Ask anything about the video...")
clear = gr.ClearButton([msg, chatbot])
msg.submit(chat_step, [msg, chatbot], [chatbot, msg])
if __name__ == "__main__":
demo.launch()
|