Spaces:

agnixcode
/

youtube-rag-chat

Runtime error

App Files Files Community

agnixcode commited on Apr 22

Commit

670c1c5

verified ·

1 Parent(s): 01c13a2

Update app.py

Browse files

Files changed (1) hide show

app.py +63 -779

app.py CHANGED Viewed

@@ -3,848 +3,132 @@ import re
 import gradio as gr
 import numpy as np
 import faiss
-# Import the library
 from youtube_transcript_api import YouTubeTranscriptApi
 from sentence_transformers import SentenceTransformer
 from langchain_text_splitters import RecursiveCharacterTextSplitter
 from groq import Groq
 # ===============================
-# CONFIG & INITIALIZATION
 # ===============================
-# Get API Key from Environment Variables (Set this in HF Space Secrets)
 GROQ_API_KEY = os.getenv("GROQ_API_KEY")
 groq_client = Groq(api_key=GROQ_API_KEY) if GROQ_API_KEY else None
-# Load embedding model
 embedding_model = SentenceTransformer("all-MiniLM-L6-v2")
-# Global Storage
 vector_store = None
 chunks_store = []
 # ===============================
-# HELPER FUNCTIONS
 # ===============================
 def extract_video_id(url):
-    """Extracts the 11-character YouTube video ID from various URL formats."""
     regex = r"(?:v=|\/|be\/)([0-9A-Za-z_-]{11}).*"
     match = re.search(regex, url)
-    if match:
-        return match.group(1)
-    return None
 def get_transcript(url):
-    """
-    Fetch transcript using the correct static method.
-    """
-    try:
-        video_id = extract_video_id(url)
-        if not video_id:
-            return "ERROR: Invalid YouTube URL. Could not find Video ID."
-        # FIX: Calling the static method directly on the class
-        # We also try to fetch English by default or the first available
-        transcript_list = YouTubeTranscriptApi.get_transcript(video_id)
-        full_text = " ".join([item['text'] for item in transcript_list])
-        return full_text
-    except Exception as e:
-        return f"ERROR: Could not retrieve transcript. (Details: {str(e)})"
-def process_transcript(transcript):
-    global vector_store, chunks_store
-    # Split text into manageable chunks
-    splitter = RecursiveCharacterTextSplitter(chunk_size=600, chunk_overlap=60)
-    chunks = splitter.split_text(transcript)
-    # Create embeddings
-    embeddings = embedding_model.encode(chunks)
-    # Initialize FAISS Index
-    dimension = embeddings.shape[1]
-    index = faiss.IndexFlatL2(dimension)
-    index.add(np.array(embeddings).astype('float32'))
-    # Store globally for retrieval
-    vector_store = index
-    chunks_store = chunks
-def retrieve_context(query, top_k=3):
-    if vector_store is None:
-        return ""
-    query_embedding = embedding_model.encode([query])
-    distances, indices = vector_store.search(np.array(query_embedding).astype('float32'), top_k)
-    # Fetch matching chunks
-    retrieved_chunks = [chunks_store[i] for i in indices[0] if i != -1]
-    return "\n\n".join(retrieved_chunks)
-def generate_answer(query):
-    if not groq_client:
-        return "Error: Groq API Key is not set in Hugging Face Secrets."
-    context = retrieve_context(query)
-    if not context:
-        return "I don't have any context from the video yet. Please process a video first."
-    prompt = f"""
-You are a professional AI Assistant. Use the provided context from a YouTube video to answer the user's question.
-If the answer isn't in the context, say you don't know based on the video.
-Context:
-{context}
-Question:
-{query}
-Answer:
-"""
-    response = groq_client.chat.completions.create(
-        model="llama-3.3-70b-versatile",
-        messages=[{"role": "user", "content": prompt}]
-    )
-    return response.choices[0].message.content
-# ===============================
-# UI LOGIC
-# ===============================
-def process_video_ui(url):
-    if not url:
-        return "Please enter a valid URL", "❌ No URL"
-    transcript = get_transcript(url)
-    if transcript.startswith("ERROR"):
-        return transcript, "❌ Failed to fetch transcript"
-    process_transcript(transcript)
-    return transcript[:1500] + "...", "✅ Video processed! You can now chat."
-def chat_with_video_ui(user_query, history):
-    if not user_query:
-        return history, ""
-    if vector_store is None:
-        history.append((user_query, "⚠️ Please process a video in the first tab before chatting."))
-        return history, ""
-    answer = generate_answer(user_query)
-    history.append((user_query, answer))
-    return history, ""
-# ===============================
-# GRADIO INTERFACE
-# ===============================
-with gr.Blocks(theme=gr.themes.Soft()) as app:
-    gr.Markdown("# 🎥 YouTube RAG AI Expert")
-    gr.Markdown("Transcribe any YouTube video and chat with its content using Llama 3.3 & FAISS.")
-    with gr.Tabs():
-        with gr.Tab("1. Load Video"):
-            url_input = gr.Textbox(label="YouTube Link", placeholder="https://www.youtube.com/watch?v=...")
-            process_btn = gr.Button("Transcribe & Index Video", variant="primary")
-            with gr.Row():
-                status_output = gr.Textbox(label="Status")
-                transcript_preview = gr.Textbox(label="Transcript Preview", lines=8)
-            process_btn.click(process_video_ui, inputs=url_input, outputs=[transcript_preview, status_output])
-        with gr.Tab("2. Chat with AI"):
-            chatbot = gr.Chatbot(height=500)
-            with gr.Row():
-                msg = gr.Textbox(label="Your Question", placeholder="What are the key takeaways?", scale=4)
-                submit = gr.Button("Ask", variant="primary", scale=1)
-            submit.click(chat_with_video_ui, inputs=[msg, chatbot], outputs=[chatbot, msg])
-            msg.submit(chat_with_video_ui, inputs=[msg, chatbot], outputs=[chatbot, msg])
-if __name__ == "__main__":
-    app.launch()
-import os
-import re
-import gradio as gr
-import numpy as np
-import faiss
-# Import the library
-from youtube_transcript_api import YouTubeTranscriptApi
-from sentence_transformers import SentenceTransformer
-from langchain_text_splitters import RecursiveCharacterTextSplitter
-from groq import Groq
-# ===============================
-# CONFIG & INITIALIZATION
-# ===============================
-# Get API Key from Environment Variables (Set this in HF Space Secrets)
-GROQ_API_KEY = os.getenv("GROQ_API_KEY")
-groq_client = Groq(api_key=GROQ_API_KEY) if GROQ_API_KEY else None
-# Load embedding model
-embedding_model = SentenceTransformer("all-MiniLM-L6-v2")
-# Global Storage
-vector_store = None
-chunks_store = []
-# ===============================
-# HELPER FUNCTIONS
-# ===============================
-def extract_video_id(url):
-    """Extracts the 11-character YouTube video ID from various URL formats."""
-    regex = r"(?:v=|\/|be\/)([0-9A-Za-z_-]{11}).*"
-    match = re.search(regex, url)
-    if match:
-        return match.group(1)
-    return None
-def get_transcript(url):
-    """
-    Fetch transcript using the correct static method.
-    """
     try:
-        video_id = extract_video_id(url)
-        if not video_id:
-            return "ERROR: Invalid YouTube URL. Could not find Video ID."
-        # FIX: Calling the static method directly on the class
-        # We also try to fetch English by default or the first available
-        transcript_list = YouTubeTranscriptApi.get_transcript(video_id)
-        full_text = " ".join([item['text'] for item in transcript_list])
-        return full_text
     except Exception as e:
-        return f"ERROR: Could not retrieve transcript. (Details: {str(e)})"
-def process_transcript(transcript):
     global vector_store, chunks_store
-    # Split text into manageable chunks
     splitter = RecursiveCharacterTextSplitter(chunk_size=600, chunk_overlap=60)
-    chunks = splitter.split_text(transcript)
-    # Create embeddings
-    embeddings = embedding_model.encode(chunks)
-    # Initialize FAISS Index
     dimension = embeddings.shape[1]
     index = faiss.IndexFlatL2(dimension)
     index.add(np.array(embeddings).astype('float32'))
-    # Store globally for retrieval
     vector_store = index
-    chunks_store = chunks
-def retrieve_context(query, top_k=3):
-    if vector_store is None:
-        return ""
-    query_embedding = embedding_model.encode([query])
-    distances, indices = vector_store.search(np.array(query_embedding).astype('float32'), top_k)
-    # Fetch matching chunks
-    retrieved_chunks = [chunks_store[i] for i in indices[0] if i != -1]
-    return "\n\n".join(retrieved_chunks)
-def generate_answer(query):
-    if not groq_client:
-        return "Error: Groq API Key is not set in Hugging Face Secrets."
-    context = retrieve_context(query)
-    if not context:
-        return "I don't have any context from the video yet. Please process a video first."
-    prompt = f"""
-You are a professional AI Assistant. Use the provided context from a YouTube video to answer the user's question.
-If the answer isn't in the context, say you don't know based on the video.
-Context:
-{context}
-Question:
-{query}
-Answer:
-"""
-    response = groq_client.chat.completions.create(
-        model="llama-3.3-70b-versatile",
-        messages=[{"role": "user", "content": prompt}]
-    )
-    return response.choices[0].message.content
-# ===============================
-# UI LOGIC
-# ===============================
-def process_video_ui(url):
-    if not url:
-        return "Please enter a valid URL", "❌ No URL"
-    transcript = get_transcript(url)
-    if transcript.startswith("ERROR"):
-        return transcript, "❌ Failed to fetch transcript"
-    process_transcript(transcript)
-    return transcript[:1500] + "...", "✅ Video processed! You can now chat."
-def chat_with_video_ui(user_query, history):
-    if not user_query:
-        return history, ""
-    if vector_store is None:
-        history.append((user_query, "⚠️ Please process a video in the first tab before chatting."))
-        return history, ""
-    answer = generate_answer(user_query)
-    history.append((user_query, answer))
-    return history, ""
-# ===============================
-# GRADIO INTERFACE
-# ===============================
-with gr.Blocks(theme=gr.themes.Soft()) as app:
-    gr.Markdown("# 🎥 YouTube RAG AI Expert")
-    gr.Markdown("Transcribe any YouTube video and chat with its content using Llama 3.3 & FAISS.")
-    with gr.Tabs():
-        with gr.Tab("1. Load Video"):
-            url_input = gr.Textbox(label="YouTube Link", placeholder="https://www.youtube.com/watch?v=...")
-            process_btn = gr.Button("Transcribe & Index Video", variant="primary")
-            with gr.Row():
-                status_output = gr.Textbox(label="Status")
-                transcript_preview = gr.Textbox(label="Transcript Preview", lines=8)
-            process_btn.click(process_video_ui, inputs=url_input, outputs=[transcript_preview, status_output])
-        with gr.Tab("2. Chat with AI"):
-            chatbot = gr.Chatbot(height=500)
-            with gr.Row():
-                msg = gr.Textbox(label="Your Question", placeholder="What are the key takeaways?", scale=4)
-                submit = gr.Button("Ask", variant="primary", scale=1)
-            submit.click(chat_with_video_ui, inputs=[msg, chatbot], outputs=[chatbot, msg])
-            msg.submit(chat_with_video_ui, inputs=[msg, chatbot], outputs=[chatbot, msg])
-if __name__ == "__main__":
-    app.launch()
-import os
-import re
-import gradio as gr
-import numpy as np
-import faiss
-# Import the library
-from youtube_transcript_api import YouTubeTranscriptApi
-from sentence_transformers import SentenceTransformer
-from langchain_text_splitters import RecursiveCharacterTextSplitter
-from groq import Groq
-# ===============================
-# CONFIG & INITIALIZATION
-# ===============================
-# Get API Key from Environment Variables (Set this in HF Space Secrets)
-GROQ_API_KEY = os.getenv("GROQ_API_KEY")
-groq_client = Groq(api_key=GROQ_API_KEY) if GROQ_API_KEY else None
-# Load embedding model
-embedding_model = SentenceTransformer("all-MiniLM-L6-v2")
-# Global Storage
-vector_store = None
-chunks_store = []
-# ===============================
-# HELPER FUNCTIONS
-# ===============================
-def extract_video_id(url):
-    """Extracts the 11-character YouTube video ID from various URL formats."""
-    regex = r"(?:v=|\/|be\/)([0-9A-Za-z_-]{11}).*"
-    match = re.search(regex, url)
-    if match:
-        return match.group(1)
-    return None
-def get_transcript(url):
-    """
-    Fetch transcript using the correct static method.
-    """
     try:
-        video_id = extract_video_id(url)
-        if not video_id:
-            return "ERROR: Invalid YouTube URL. Could not find Video ID."
-        # FIX: Calling the static method directly on the class
-        # We also try to fetch English by default or the first available
-        transcript_list = YouTubeTranscriptApi.get_transcript(video_id)
-        full_text = " ".join([item['text'] for item in transcript_list])
-        return full_text
     except Exception as e:
-        return f"ERROR: Could not retrieve transcript. (Details: {str(e)})"
-def process_transcript(transcript):
-    global vector_store, chunks_store
-    # Split text into manageable chunks
-    splitter = RecursiveCharacterTextSplitter(chunk_size=600, chunk_overlap=60)
-    chunks = splitter.split_text(transcript)
-    # Create embeddings
-    embeddings = embedding_model.encode(chunks)
-    # Initialize FAISS Index
-    dimension = embeddings.shape[1]
-    index = faiss.IndexFlatL2(dimension)
-    index.add(np.array(embeddings).astype('float32'))
-    # Store globally for retrieval
-    vector_store = index
-    chunks_store = chunks
-def retrieve_context(query, top_k=3):
-    if vector_store is None:
-        return ""
-    query_embedding = embedding_model.encode([query])
-    distances, indices = vector_store.search(np.array(query_embedding).astype('float32'), top_k)
-    # Fetch matching chunks
-    retrieved_chunks = [chunks_store[i] for i in indices[0] if i != -1]
-    return "\n\n".join(retrieved_chunks)
-def generate_answer(query):
-    if not groq_client:
-        return "Error: Groq API Key is not set in Hugging Face Secrets."
-    context = retrieve_context(query)
-    if not context:
-        return "I don't have any context from the video yet. Please process a video first."
-    prompt = f"""
-You are a professional AI Assistant. Use the provided context from a YouTube video to answer the user's question.
-If the answer isn't in the context, say you don't know based on the video.
-Context:
-{context}
-Question:
-{query}
-Answer:
-"""
-    response = groq_client.chat.completions.create(
-        model="llama-3.3-70b-versatile",
-        messages=[{"role": "user", "content": prompt}]
-    )
-    return response.choices[0].message.content
 # ===============================
 # UI LOGIC
 # ===============================
-def process_video_ui(url):
-    if not url:
-        return "Please enter a valid URL", "❌ No URL"
-    transcript = get_transcript(url)
-    if transcript.startswith("ERROR"):
-        return transcript, "❌ Failed to fetch transcript"
-    process_transcript(transcript)
-    return transcript[:1500] + "...", "✅ Video processed! You can now chat."
-def chat_with_video_ui(user_query, history):
-    if not user_query:
-        return history, ""
-    if vector_store is None:
-        history.append((user_query, "⚠️ Please process a video in the first tab before chatting."))
-        return history, ""
-    answer = generate_answer(user_query)
-    history.append((user_query, answer))
-    return history, ""
-# ===============================
-# GRADIO INTERFACE
-# ===============================
-with gr.Blocks(theme=gr.themes.Soft()) as app:
-    gr.Markdown("# 🎥 YouTube RAG AI Expert")
-    gr.Markdown("Transcribe any YouTube video and chat with its content using Llama 3.3 & FAISS.")
-    with gr.Tabs():
-        with gr.Tab("1. Load Video"):
-            url_input = gr.Textbox(label="YouTube Link", placeholder="https://www.youtube.com/watch?v=...")
-            process_btn = gr.Button("Transcribe & Index Video", variant="primary")
-            with gr.Row():
-                status_output = gr.Textbox(label="Status")
-                transcript_preview = gr.Textbox(label="Transcript Preview", lines=8)
-            process_btn.click(process_video_ui, inputs=url_input, outputs=[transcript_preview, status_output])
-        with gr.Tab("2. Chat with AI"):
-            chatbot = gr.Chatbot(height=500)
-            with gr.Row():
-                msg = gr.Textbox(label="Your Question", placeholder="What are the key takeaways?", scale=4)
-                submit = gr.Button("Ask", variant="primary", scale=1)
-            submit.click(chat_with_video_ui, inputs=[msg, chatbot], outputs=[chatbot, msg])
-            msg.submit(chat_with_video_ui, inputs=[msg, chatbot], outputs=[chatbot, msg])
-if __name__ == "__main__":
-    app.launch()
-import os
-import re
-import gradio as gr
-import numpy as np
-import faiss
-# Import the library
-from youtube_transcript_api import YouTubeTranscriptApi
-from sentence_transformers import SentenceTransformer
-from langchain_text_splitters import RecursiveCharacterTextSplitter
-from groq import Groq
-# ===============================
-# CONFIG & INITIALIZATION
-# ===============================
-# Get API Key from Environment Variables (Set this in HF Space Secrets)
-GROQ_API_KEY = os.getenv("GROQ_API_KEY")
-groq_client = Groq(api_key=GROQ_API_KEY) if GROQ_API_KEY else None
-# Load embedding model
-embedding_model = SentenceTransformer("all-MiniLM-L6-v2")
-# Global Storage
-vector_store = None
-chunks_store = []
-# ===============================
-# HELPER FUNCTIONS
-# ===============================
-def extract_video_id(url):
-    """Extracts the 11-character YouTube video ID from various URL formats."""
-    regex = r"(?:v=|\/|be\/)([0-9A-Za-z_-]{11}).*"
-    match = re.search(regex, url)
-    if match:
-        return match.group(1)
-    return None
-def get_transcript(url):
-    """
-    Fetch transcript using the correct static method.
-    """
-    try:
-        video_id = extract_video_id(url)
-        if not video_id:
-            return "ERROR: Invalid YouTube URL. Could not find Video ID."
-        # FIX: Calling the static method directly on the class
-        # We also try to fetch English by default or the first available
-        transcript_list = YouTubeTranscriptApi.get_transcript(video_id)
-        full_text = " ".join([item['text'] for item in transcript_list])
-        return full_text
-    except Exception as e:
-        return f"ERROR: Could not retrieve transcript. (Details: {str(e)})"
-def process_transcript(transcript):
-    global vector_store, chunks_store
-    # Split text into manageable chunks
-    splitter = RecursiveCharacterTextSplitter(chunk_size=600, chunk_overlap=60)
-    chunks = splitter.split_text(transcript)
-    # Create embeddings
-    embeddings = embedding_model.encode(chunks)
-    # Initialize FAISS Index
-    dimension = embeddings.shape[1]
-    index = faiss.IndexFlatL2(dimension)
-    index.add(np.array(embeddings).astype('float32'))
-    # Store globally for retrieval
-    vector_store = index
-    chunks_store = chunks
-def retrieve_context(query, top_k=3):
-    if vector_store is None:
-        return ""
-    query_embedding = embedding_model.encode([query])
-    distances, indices = vector_store.search(np.array(query_embedding).astype('float32'), top_k)
-    # Fetch matching chunks
-    retrieved_chunks = [chunks_store[i] for i in indices[0] if i != -1]
-    return "\n\n".join(retrieved_chunks)
-def generate_answer(query):
-    if not groq_client:
-        return "Error: Groq API Key is not set in Hugging Face Secrets."
-    context = retrieve_context(query)
-    if not context:
-        return "I don't have any context from the video yet. Please process a video first."
-    prompt = f"""
-You are a professional AI Assistant. Use the provided context from a YouTube video to answer the user's question.
-If the answer isn't in the context, say you don't know based on the video.
-Context:
-{context}
-Question:
-{query}
-Answer:
-"""
-    response = groq_client.chat.completions.create(
-        model="llama-3.3-70b-versatile",
-        messages=[{"role": "user", "content": prompt}]
-    )
-    return response.choices[0].message.content
-# ===============================
-# UI LOGIC
-# ===============================
-def process_video_ui(url):
-    if not url:
-        return "Please enter a valid URL", "❌ No URL"
     transcript = get_transcript(url)
     if transcript.startswith("ERROR"):
-        return transcript, "❌ Failed to fetch transcript"
-    process_transcript(transcript)
-    return transcript[:1500] + "...", "✅ Video processed! You can now chat."
-def chat_with_video_ui(user_query, history):
-    if not user_query:
-        return history, ""
-    if vector_store is None:
-        history.append((user_query, "⚠️ Please process a video in the first tab before chatting."))
-        return history, ""
-    answer = generate_answer(user_query)
-    history.append((user_query, answer))
-    return history, ""
-# ===============================
-# GRADIO INTERFACE
-# ===============================
-with gr.Blocks(theme=gr.themes.Soft()) as app:
-    gr.Markdown("# 🎥 YouTube RAG AI Expert")
-    gr.Markdown("Transcribe any YouTube video and chat with its content using Llama 3.3 & FAISS.")
-    with gr.Tabs():
-        with gr.Tab("1. Load Video"):
-            url_input = gr.Textbox(label="YouTube Link", placeholder="https://www.youtube.com/watch?v=...")
-            process_btn = gr.Button("Transcribe & Index Video", variant="primary")
-            with gr.Row():
-                status_output = gr.Textbox(label="Status")
-                transcript_preview = gr.Textbox(label="Transcript Preview", lines=8)
-            process_btn.click(process_video_ui, inputs=url_input, outputs=[transcript_preview, status_output])
-        with gr.Tab("2. Chat with AI"):
-            chatbot = gr.Chatbot(height=500)
-            with gr.Row():
-                msg = gr.Textbox(label="Your Question", placeholder="What are the key takeaways?", scale=4)
-                submit = gr.Button("Ask", variant="primary", scale=1)
-            submit.click(chat_with_video_ui, inputs=[msg, chatbot], outputs=[chatbot, msg])
-            msg.submit(chat_with_video_ui, inputs=[msg, chatbot], outputs=[chatbot, msg])
-if __name__ == "__main__":
-    app.launch()
-import os
-import re
-import gradio as gr
-import numpy as np
-import faiss
-# Import the library
-from youtube_transcript_api import YouTubeTranscriptApi
-from sentence_transformers import SentenceTransformer
-from langchain_text_splitters import RecursiveCharacterTextSplitter
-from groq import Groq
-# ===============================
-# CONFIG & INITIALIZATION
-# ===============================
-# Get API Key from Environment Variables (Set this in HF Space Secrets)
-GROQ_API_KEY = os.getenv("GROQ_API_KEY")
-groq_client = Groq(api_key=GROQ_API_KEY) if GROQ_API_KEY else None
-# Load embedding model
-embedding_model = SentenceTransformer("all-MiniLM-L6-v2")
-# Global Storage
-vector_store = None
-chunks_store = []
-# ===============================
-# HELPER FUNCTIONS
-# ===============================
-def extract_video_id(url):
-    """Extracts the 11-character YouTube video ID from various URL formats."""
-    regex = r"(?:v=|\/|be\/)([0-9A-Za-z_-]{11}).*"
-    match = re.search(regex, url)
-    if match:
-        return match.group(1)
-    return None
-def get_transcript(url):
-    """
-    Fetch transcript using the correct static method.
-    """
-    try:
-        video_id = extract_video_id(url)
-        if not video_id:
-            return "ERROR: Invalid YouTube URL. Could not find Video ID."
-        # FIX: Calling the static method directly on the class
-        # We also try to fetch English by default or the first available
-        transcript_list = YouTubeTranscriptApi.get_transcript(video_id)
-        full_text = " ".join([item['text'] for item in transcript_list])
-        return full_text
-    except Exception as e:
-        return f"ERROR: Could not retrieve transcript. (Details: {str(e)})"
-def process_transcript(transcript):
-    global vector_store, chunks_store
-    # Split text into manageable chunks
-    splitter = RecursiveCharacterTextSplitter(chunk_size=600, chunk_overlap=60)
-    chunks = splitter.split_text(transcript)
-    # Create embeddings
-    embeddings = embedding_model.encode(chunks)
-    # Initialize FAISS Index
-    dimension = embeddings.shape[1]
-    index = faiss.IndexFlatL2(dimension)
-    index.add(np.array(embeddings).astype('float32'))
-    # Store globally for retrieval
-    vector_store = index
-    chunks_store = chunks
-def retrieve_context(query, top_k=3):
-    if vector_store is None:
-        return ""
-    query_embedding = embedding_model.encode([query])
-    distances, indices = vector_store.search(np.array(query_embedding).astype('float32'), top_k)
-    # Fetch matching chunks
-    retrieved_chunks = [chunks_store[i] for i in indices[0] if i != -1]
-    return "\n\n".join(retrieved_chunks)
-def generate_answer(query):
-    if not groq_client:
-        return "Error: Groq API Key is not set in Hugging Face Secrets."
-    context = retrieve_context(query)
-    if not context:
-        return "I don't have any context from the video yet. Please process a video first."
-    prompt = f"""
-You are a professional AI Assistant. Use the provided context from a YouTube video to answer the user's question.
-If the answer isn't in the context, say you don't know based on the video.
-Context:
-{context}
-Question:
-{query}
-Answer:
-"""
-    response = groq_client.chat.completions.create(
-        model="llama-3.3-70b-versatile",
-        messages=[{"role": "user", "content": prompt}]
-    )
-    return response.choices[0].message.content
-# ===============================
-# UI LOGIC
-# ===============================
-def process_video_ui(url):
-    if not url:
-        return "Please enter a valid URL", "❌ No URL"
-    transcript = get_transcript(url)
-    if transcript.startswith("ERROR"):
-        return transcript, "❌ Failed to fetch transcript"
-    process_transcript(transcript)
-    return transcript[:1500] + "...", "✅ Video processed! You can now chat."
-def chat_with_video_ui(user_query, history):
-    if not user_query:
-        return history, ""
-    if vector_store is None:
-        history.append((user_query, "⚠️ Please process a video in the first tab before chatting."))
-        return history, ""
-    answer = generate_answer(user_query)
-    history.append((user_query, answer))
     return history, ""
 # ===============================
 # GRADIO INTERFACE
 # ===============================
-with gr.Blocks(theme=gr.themes.Soft()) as app:
-    gr.Markdown("# 🎥 YouTube RAG AI Expert")
-    gr.Markdown("Transcribe any YouTube video and chat with its content using Llama 3.3 & FAISS.")
     with gr.Tabs():
-        with gr.Tab("1. Load Video"):
-            url_input = gr.Textbox(label="YouTube Link", placeholder="https://www.youtube.com/watch?v=...")
-            process_btn = gr.Button("Transcribe & Index Video", variant="primary")
-            with gr.Row():
-                status_output = gr.Textbox(label="Status")
-                transcript_preview = gr.Textbox(label="Transcript Preview", lines=8)
-            process_btn.click(process_video_ui, inputs=url_input, outputs=[transcript_preview, status_output])
-        with gr.Tab("2. Chat with AI"):
-            chatbot = gr.Chatbot(height=500)
-            with gr.Row():
-                msg = gr.Textbox(label="Your Question", placeholder="What are the key takeaways?", scale=4)
-                submit = gr.Button("Ask", variant="primary", scale=1)
-            submit.click(chat_with_video_ui, inputs=[msg, chatbot], outputs=[chatbot, msg])
-            msg.submit(chat_with_video_ui, inputs=[msg, chatbot], outputs=[chatbot, msg])
 if __name__ == "__main__":
-    app.launch()

 import gradio as gr
 import numpy as np
 import faiss
 from youtube_transcript_api import YouTubeTranscriptApi
 from sentence_transformers import SentenceTransformer
 from langchain_text_splitters import RecursiveCharacterTextSplitter
 from groq import Groq
 # ===============================
+# CONFIGURATION
 # ===============================
+# Load Groq API Key from environment variables (Hugging Face Secrets)
 GROQ_API_KEY = os.getenv("GROQ_API_KEY")
 groq_client = Groq(api_key=GROQ_API_KEY) if GROQ_API_KEY else None
+# Load embedding model (runs on CPU in HF Spaces)
 embedding_model = SentenceTransformer("all-MiniLM-L6-v2")
+# Global variables to store the "brain" of the current video
 vector_store = None
 chunks_store = []
 # ===============================
+# CORE FUNCTIONS
 # ===============================
 def extract_video_id(url):
+    """Extracts the 11-character YouTube video ID."""
     regex = r"(?:v=|\/|be\/)([0-9A-Za-z_-]{11}).*"
     match = re.search(regex, url)
+    return match.group(1) if match else None
 def get_transcript(url):
+    """Fetches transcript and handles potential library errors."""
+    video_id = extract_video_id(url)
+    if not video_id:
+        return "ERROR: Invalid YouTube URL."
     try:
+        # Correct static method call on the YouTubeTranscriptApi class
+        transcript_data = YouTubeTranscriptApi.get_transcript(video_id)
+        return " ".join([item['text'] for item in transcript_data])
     except Exception as e:
+        return f"ERROR: {str(e)}"
+def build_vector_index(text):
+    """Chunks text and stores it in a FAISS vector database."""
     global vector_store, chunks_store
+    # 1. Chunking
     splitter = RecursiveCharacterTextSplitter(chunk_size=600, chunk_overlap=60)
+    chunks_store = splitter.split_text(text)
+    # 2. Embedding
+    embeddings = embedding_model.encode(chunks_store)
+    # 3. Indexing with FAISS
     dimension = embeddings.shape[1]
     index = faiss.IndexFlatL2(dimension)
     index.add(np.array(embeddings).astype('float32'))
     vector_store = index
+def get_ai_response(user_query):
+    """Retrieves context and asks Groq Llama 3."""
+    if vector_store is None or not chunks_store:
+        return "Please load a video first."
+    # Search for relevant chunks
+    query_embedding = embedding_model.encode([user_query])
+    D, I = vector_store.search(np.array(query_embedding).astype('float32'), k=3)
+    context = "\n".join([chunks_store[i] for i in I[0] if i != -1])
+    prompt = f"""Use the following video transcript context to answer the question.
+    Context: {context}
+    Question: {user_query}
+    Answer:"""
     try:
+        completion = groq_client.chat.completions.create(
+            model="llama-3.3-70b-versatile",
+            messages=[{"role": "user", "content": prompt}]
+        )
+        return completion.choices[0].message.content
     except Exception as e:
+        return f"AI Error: {str(e)}"
 # ===============================
 # UI LOGIC
 # ===============================
+def process_video_step(url):
     transcript = get_transcript(url)
     if transcript.startswith("ERROR"):
+        return transcript, "❌ Failed"
+    build_vector_index(transcript)
+    return transcript[:1000] + "...", "✅ Video Indexed! Go to Chat tab."
+def chat_step(message, history):
+    if not GROQ_API_KEY:
+        return history + [("Error", "Groq API Key missing in Secrets.")], ""
+    answer = get_ai_response(message)
+    history.append((message, answer))
     return history, ""
 # ===============================
 # GRADIO INTERFACE
 # ===============================
+with gr.Blocks(theme=gr.themes.Soft()) as demo:
+    gr.Markdown("# 📺 YouTube AI Expert (RAG)")
     with gr.Tabs():
+        with gr.Tab("1. Setup Video"):
+            url_input = gr.Textbox(label="YouTube URL", placeholder="https://www.youtube.com/watch?v=...")
+            process_btn = gr.Button("Process Video", variant="primary")
+            status = gr.Textbox(label="Status")
+            preview = gr.Textbox(label="Transcript Preview (First 1000 chars)", lines=5)
+            process_btn.click(process_video_step, inputs=url_input, outputs=[preview, status])
+        with gr.Tab("2. Chat with Video"):
+            chatbot = gr.Chatbot(height=400)
+            msg = gr.Textbox(label="Ask anything about the video...")
+            clear = gr.ClearButton([msg, chatbot])
+            msg.submit(chat_step, [msg, chatbot], [chatbot, msg])
 if __name__ == "__main__":
+    demo.launch()