Spaces:
Sleeping
Sleeping
| import os | |
| import gradio as gr | |
| from urllib.parse import urlparse, parse_qs | |
| from youtube_transcript_api import YouTubeTranscriptApi | |
| from langchain_text_splitters import RecursiveCharacterTextSplitter | |
| from langchain_community.embeddings import HuggingFaceEmbeddings | |
| from langchain_community.vectorstores import FAISS | |
| from groq import Groq | |
| # ββ Setup ββββββββββββββββββββββββββββββββββββββββββββββ | |
| GROQ_API_KEY = os.environ.get("GROQ_API_KEY") # Set this in HF Space Secrets | |
| client = Groq(api_key=GROQ_API_KEY) | |
| embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2") | |
| vectorstore = None # global, updated when URL is loaded | |
| # ββ URL Parsing ββββββββββββββββββββββββββββββββββββββββ | |
| def extract_video_id(url): | |
| """Handle all YouTube URL formats: | |
| - https://www.youtube.com/watch?v=ID | |
| - https://youtu.be/ID | |
| - https://www.youtube.com/live/ID | |
| - https://www.youtube.com/shorts/ID | |
| - https://www.youtube.com/embed/ID | |
| """ | |
| url = url.strip() | |
| parsed = urlparse(url) | |
| # youtu.be/ID | |
| if parsed.netloc in ("youtu.be", "www.youtu.be"): | |
| return parsed.path.lstrip("/").split("?")[0] | |
| # /watch?v=ID | |
| qs = parse_qs(parsed.query) | |
| if "v" in qs: | |
| return qs["v"][0] | |
| # /live/ID or /shorts/ID or /embed/ID | |
| parts = [p for p in parsed.path.split("/") if p] | |
| if len(parts) >= 2 and parts[0] in ("live", "shorts", "embed"): | |
| return parts[1] | |
| raise ValueError( | |
| f"Could not extract video ID from: {url}\n" | |
| "Supported formats: /watch?v=ID, youtu.be/ID, /live/ID, /shorts/ID" | |
| ) | |
| # ββ Core Functions βββββββββββββββββββββββββββββββββββββ | |
| def get_transcript(video_url): | |
| video_id = extract_video_id(video_url) | |
| ytt = YouTubeTranscriptApi() | |
| transcript = ytt.fetch(video_id) | |
| return " ".join([entry.text for entry in transcript]) | |
| def build_vectorstore(text): | |
| splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50) | |
| chunks = splitter.create_documents([text]) | |
| return FAISS.from_documents(chunks, embeddings) | |
| def answer_query(vs, query): | |
| docs = vs.similarity_search(query, k=3) | |
| context = "\n\n".join([doc.page_content for doc in docs]) | |
| response = client.chat.completions.create( | |
| model="llama-3.1-8b-instant", | |
| messages=[ | |
| { | |
| "role": "system", | |
| "content": f"""You are a helpful assistant that answers questions | |
| based ONLY on the provided video transcript context. | |
| If the answer is not in the context, say 'This topic was not covered in the video.' | |
| Context: | |
| {context}""" | |
| }, | |
| {"role": "user", "content": query} | |
| ] | |
| ) | |
| return response.choices[0].message.content | |
| # ββ Gradio Handlers ββββββββββββββββββββββββββββββββββββ | |
| def load_video(url): | |
| global vectorstore | |
| if not url.strip(): | |
| return "β οΈ Please enter a YouTube URL.", gr.update(interactive=False) | |
| try: | |
| transcript = get_transcript(url) | |
| vectorstore = build_vectorstore(transcript) | |
| return "β Video loaded! You can now ask questions below.", gr.update(interactive=True) | |
| except Exception as e: | |
| return f"β Error: {str(e)}", gr.update(interactive=False) | |
| def chat(query, history): | |
| if vectorstore is None: | |
| return "β οΈ Please load a YouTube video first using the URL field above." | |
| if not query.strip(): | |
| return "Please enter a question." | |
| return answer_query(vectorstore, query) | |
| # ββ UI βββββββββββββββββββββββββββββββββββββββββββββββββ | |
| with gr.Blocks(title="YouTube RAG Chatbot") as app: | |
| gr.Markdown("# π₯ YouTube RAG Chatbot\nPaste any YouTube URL and ask questions about the video!") | |
| with gr.Row(): | |
| url_input = gr.Textbox( | |
| placeholder="https://www.youtube.com/watch?v=...", | |
| label="YouTube URL", | |
| scale=4 | |
| ) | |
| load_btn = gr.Button("βΆ Load Video", variant="primary", scale=1) | |
| status_box = gr.Textbox(label="Status", interactive=False) | |
| load_btn.click( | |
| fn=load_video, | |
| inputs=[url_input], | |
| outputs=[status_box, load_btn] | |
| ) | |
| gr.ChatInterface( | |
| fn=chat, | |
| examples=[ | |
| "What is neuroplasticity?", | |
| "How can we change our brain?", | |
| "What role does behavior play in learning?", | |
| "What did the speaker say about stroke patients?" | |
| ], | |
| title="" | |
| ) | |
| app.launch() # No share=True needed on HF Spaces |