Spaces:

agnixcode
/

youtube_chatbot_transcriber

Sleeping

App Files Files Community

agnixcode commited on Apr 22

Commit

e34ac27

verified ·

1 Parent(s): 1d8e642

Update app.py

Browse files

Files changed (1) hide show

app.py +105 -61

app.py CHANGED Viewed

@@ -6,7 +6,12 @@ import gradio as gr
 import numpy as np
 import faiss
-from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled, NoTranscriptFound
 from langchain_text_splitters import RecursiveCharacterTextSplitter
 from sentence_transformers import SentenceTransformer
 from huggingface_hub import InferenceClient
@@ -22,7 +27,6 @@ full_transcript = ""
 HF_TOKEN = os.environ.get("HF_TOKEN", "")
 LLM_MODEL = "mistralai/Mistral-7B-Instruct-v0.3"
 inference_client = InferenceClient(model=LLM_MODEL, token=HF_TOKEN or None)
 # ---------------------------------------------------------------------------
@@ -39,30 +43,55 @@ def _extract_video_id(url: str) -> str:
         match = re.search(pattern, url)
         if match:
             return match.group(1)
-    raise ValueError(f"Could not extract a valid video ID from URL: {url}")
 # ---------------------------------------------------------------------------
 # 1. Fetch transcript
 # ---------------------------------------------------------------------------
 def get_transcript(url: str) -> str:
     video_id = _extract_video_id(url)
     try:
-        transcript_list = YouTubeTranscriptApi.get_transcript(video_id)
     except TranscriptsDisabled:
         raise ValueError("Transcripts are disabled for this video.")
-    except NoTranscriptFound:
-        try:
-            transcript_list = (
-                YouTubeTranscriptApi.list_transcripts(video_id)
-                .find_generated_transcript(["en", "en-US", "en-GB"])
-                .fetch()
-            )
-        except Exception as inner_exc:
-            raise ValueError(f"No transcript found. Details: {inner_exc}")
     except Exception as exc:
-        raise ValueError(f"Failed to retrieve transcript: {exc}")
-    return " ".join(seg["text"] for seg in transcript_list)
 # ---------------------------------------------------------------------------
 # 2. Process video
@@ -74,10 +103,18 @@ def process_video(url: str):
     chunk_store = []
     full_transcript = ""
     try:
         transcript = get_transcript(url)
     except ValueError as exc:
-        return str(exc), ""
     full_transcript = transcript
@@ -88,7 +125,7 @@ def process_video(url: str):
     )
     chunks = splitter.split_text(transcript)
     if not chunks:
-        return "Transcript was fetched but produced no text chunks.", transcript
     chunk_store = chunks
@@ -105,7 +142,7 @@ def process_video(url: str):
         f"   • Chunks created : {len(chunks)}\n"
         f"   • Embedding dim  : {dim}\n"
         f"   • FAISS vectors  : {index.ntotal}\n\n"
-        f"Switch to the Chat with Video tab to start asking questions."
     )
     return status, transcript
@@ -120,9 +157,9 @@ def retrieve_context(query: str, top_k: int = 3) -> str:
     query_vec = np.array(query_vec, dtype="float32")
     k = min(top_k, len(chunk_store))
-    distances, indices = faiss_index.search(query_vec, k)
-    retrieved = [chunk_store[i] for i in indices[0] if i < len(chunk_store)]
     return "\n\n".join(retrieved)
 # ---------------------------------------------------------------------------
@@ -131,20 +168,20 @@ def retrieve_context(query: str, top_k: int = 3) -> str:
 def generate_answer(query: str) -> str:
     if faiss_index is None:
         return (
-            "⚠️ No video has been processed yet. "
-            "Please go to the Process Video tab and load a YouTube URL first."
         )
     context = retrieve_context(query, top_k=3)
     if not context:
-        return "⚠️ Could not retrieve any relevant context for your question."
     system_prompt = (
-        "You are a helpful assistant that answers questions strictly based on "
-        "the provided transcript context. "
-        "If the answer is not contained in the context, say: "
         "'I could not find this information in the video transcript.' "
-        "Do NOT make up information."
     )
     user_prompt = (
@@ -154,57 +191,53 @@ def generate_answer(query: str) -> str:
         f"Answer:"
     )
-    messages = [
-        {"role": "system", "content": system_prompt},
-        {"role": "user",   "content": user_prompt},
-    ]
     try:
         response = inference_client.chat_completion(
-            messages=messages,
             max_tokens=512,
             temperature=0.2,
             top_p=0.9,
         )
-        answer = response.choices[0].message.content.strip()
     except Exception as exc:
-        answer = (
-            f"❌ Model inference failed: {exc}\n\n"
-            "Make sure HF_TOKEN is set and the model endpoint is available."
         )
-    return answer
 # ---------------------------------------------------------------------------
 # 5. Chat helper
 # ---------------------------------------------------------------------------
 def chat(user_message: str, history: list):
     if not user_message.strip():
-        history.append([user_message, "Please enter a question."])
         return history, ""
     answer = generate_answer(user_message)
-    history.append([user_message, answer])
     return history, ""
 # ---------------------------------------------------------------------------
-# 6. Gradio UI  –  fully Gradio 6.0 compatible (no deprecated args)
 # ---------------------------------------------------------------------------
 with gr.Blocks(title="YouTube RAG Chatbot") as app:
     gr.Markdown(
         """
         # 🎬 YouTube RAG Chatbot
-        **Process any YouTube video and chat with its transcript using RAG.**
-        > Set your `HF_TOKEN` as a Space secret for LLM inference to work.
         """
     )
     with gr.Tabs():
-        # ── Tab 1 ──────────────────────────────────────────────────────────
         with gr.TabItem("📥 Process Video"):
-            gr.Markdown("Paste a YouTube URL and click **Process**.")
             with gr.Row():
                 url_input = gr.Textbox(
@@ -220,7 +253,7 @@ with gr.Blocks(title="YouTube RAG Chatbot") as app:
                 interactive=False,
             )
             transcript_output = gr.Textbox(
-                label="Transcript (read-only)",
                 lines=15,
                 interactive=False,
             )
@@ -231,42 +264,53 @@ with gr.Blocks(title="YouTube RAG Chatbot") as app:
                 outputs=[status_output, transcript_output],
             )
-        # ── Tab 2 ──────────────────────────────────────────────────────────
         with gr.TabItem("💬 Chat with Video"):
-            gr.Markdown("Ask any question about the processed video.")
-            chatbot = gr.Chatbot(
-                label="Conversation",
-                height=450,
-            )
             with gr.Row():
                 query_input = gr.Textbox(
                     label="Your question",
-                    placeholder="What is the main topic discussed in this video?",
                     scale=5,
                 )
                 send_btn = gr.Button("Send 🚀", variant="primary", scale=1)
-            clear_btn = gr.Button("🗑️ Clear conversation", variant="secondary")
             chat_history = gr.State([])
             send_btn.click(
                 fn=chat,
                 inputs=[query_input, chat_history],
                 outputs=[chatbot, query_input],
-            ).then(fn=lambda h: h, inputs=[chatbot], outputs=[chat_history])
             query_input.submit(
                 fn=chat,
                 inputs=[query_input, chat_history],
                 outputs=[chatbot, query_input],
-            ).then(fn=lambda h: h, inputs=[chatbot], outputs=[chat_history])
-            clear_btn.click(fn=lambda: ([], []), outputs=[chatbot, chat_history])
 # ---------------------------------------------------------------------------
-# Launch
 # ---------------------------------------------------------------------------
 if __name__ == "__main__":
     app.launch()

 import numpy as np
 import faiss
+from youtube_transcript_api import (
+    YouTubeTranscriptApi,
+    TranscriptsDisabled,
+    NoTranscriptFound,
+    VideoUnavailable,
+)
 from langchain_text_splitters import RecursiveCharacterTextSplitter
 from sentence_transformers import SentenceTransformer
 from huggingface_hub import InferenceClient
 HF_TOKEN = os.environ.get("HF_TOKEN", "")
 LLM_MODEL = "mistralai/Mistral-7B-Instruct-v0.3"
 inference_client = InferenceClient(model=LLM_MODEL, token=HF_TOKEN or None)
 # ---------------------------------------------------------------------------
         match = re.search(pattern, url)
         if match:
             return match.group(1)
+    raise ValueError(f"Could not extract a valid video ID from: {url}")
 # ---------------------------------------------------------------------------
 # 1. Fetch transcript
+#    Confirmed from source: ALL methods are CLASS methods.
+#    get_transcript() returns list of dicts: [{"text": str, "start": float, "duration": float}]
+#    Access text with snippet["text"] not snippet.text
 # ---------------------------------------------------------------------------
 def get_transcript(url: str) -> str:
     video_id = _extract_video_id(url)
+    # Primary: try English directly
     try:
+        snippets = YouTubeTranscriptApi.get_transcript(
+            video_id, languages=["en", "en-US", "en-GB"]
+        )
+        return " ".join(s["text"] for s in snippets)
+    except (NoTranscriptFound, TranscriptsDisabled):
+        pass
+    except VideoUnavailable:
+        raise ValueError("This video is unavailable or private.")
+    except Exception:
+        pass
+    # Fallback: list all, pick first available, fetch it
+    try:
+        transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
+        transcript = None
+        # prefer any english variant
+        for t in transcript_list:
+            if t.language_code.startswith("en"):
+                transcript = t
+                break
+        # if no english, take the first one
+        if transcript is None:
+            for t in transcript_list:
+                transcript = t
+                break
+        if transcript is None:
+            raise ValueError("No transcripts are available for this video.")
+        # fetch() returns list of dicts [{"text":..., "start":..., "duration":...}]
+        snippets = transcript.fetch()
+        return " ".join(s["text"] for s in snippets)
+    except ValueError:
+        raise
     except TranscriptsDisabled:
         raise ValueError("Transcripts are disabled for this video.")
     except Exception as exc:
+        raise ValueError(f"Could not retrieve transcript: {exc}")
 # ---------------------------------------------------------------------------
 # 2. Process video
     chunk_store = []
     full_transcript = ""
+    if not url.strip():
+        return "⚠️ Please enter a YouTube URL.", ""
     try:
         transcript = get_transcript(url)
     except ValueError as exc:
+        return f"❌ {exc}", ""
+    except Exception as exc:
+        return f"❌ Unexpected error: {exc}", ""
+    if not transcript.strip():
+        return "❌ Transcript is empty for this video.", ""
     full_transcript = transcript
     )
     chunks = splitter.split_text(transcript)
     if not chunks:
+        return "❌ Could not split transcript into chunks.", transcript
     chunk_store = chunks
         f"   • Chunks created : {len(chunks)}\n"
         f"   • Embedding dim  : {dim}\n"
         f"   • FAISS vectors  : {index.ntotal}\n\n"
+        f"Switch to the 💬 Chat with Video tab to ask questions."
     )
     return status, transcript
     query_vec = np.array(query_vec, dtype="float32")
     k = min(top_k, len(chunk_store))
+    _, indices = faiss_index.search(query_vec, k)
+    retrieved = [chunk_store[i] for i in indices[0] if 0 <= i < len(chunk_store)]
     return "\n\n".join(retrieved)
 # ---------------------------------------------------------------------------
 def generate_answer(query: str) -> str:
     if faiss_index is None:
         return (
+            "⚠️ No video processed yet. "
+            "Go to 📥 Process Video tab first."
         )
     context = retrieve_context(query, top_k=3)
     if not context:
+        return "⚠️ Could not retrieve relevant context for your question."
     system_prompt = (
+        "You are a helpful assistant that answers questions strictly "
+        "based on the provided video transcript context. "
+        "If the answer is not in the context, say: "
         "'I could not find this information in the video transcript.' "
+        "Do NOT hallucinate or make up information."
     )
     user_prompt = (
         f"Answer:"
     )
     try:
         response = inference_client.chat_completion(
+            messages=[
+                {"role": "system", "content": system_prompt},
+                {"role": "user",   "content": user_prompt},
+            ],
             max_tokens=512,
             temperature=0.2,
             top_p=0.9,
         )
+        return response.choices[0].message.content.strip()
     except Exception as exc:
+        return (
+            f"❌ Inference failed: {exc}\n"
+            "Check that HF_TOKEN is set correctly as a Space secret."
         )
 # ---------------------------------------------------------------------------
 # 5. Chat helper
+#    Gradio 6.x Chatbot uses list of [user, bot] pairs (list of lists)
 # ---------------------------------------------------------------------------
 def chat(user_message: str, history: list):
     if not user_message.strip():
+        history = history + [["", "⚠️ Please enter a question."]]
         return history, ""
     answer = generate_answer(user_message)
+    history = history + [[user_message, answer]]
     return history, ""
 # ---------------------------------------------------------------------------
+# 6. Gradio UI — fully compatible with Gradio 6.13
 # ---------------------------------------------------------------------------
 with gr.Blocks(title="YouTube RAG Chatbot") as app:
     gr.Markdown(
         """
         # 🎬 YouTube RAG Chatbot
+        **Fetch any YouTube transcript and chat with it using RAG + Mistral-7B.**
+        > 🔑 Add your `HF_TOKEN` in Space **Settings → Secrets** for the LLM to work.
         """
     )
     with gr.Tabs():
+        # ── Tab 1: Process ─────────────────────────────────────────────────
         with gr.TabItem("📥 Process Video"):
+            gr.Markdown("Enter a YouTube URL and click **Process** to index the transcript.")
             with gr.Row():
                 url_input = gr.Textbox(
                 interactive=False,
             )
             transcript_output = gr.Textbox(
+                label="Transcript",
                 lines=15,
                 interactive=False,
             )
                 outputs=[status_output, transcript_output],
             )
+        # ── Tab 2: Chat ────────────────────────────────────────────────────
         with gr.TabItem("💬 Chat with Video"):
+            gr.Markdown("Ask questions about the video. Answers are grounded in the transcript.")
+            # Gradio 6.13: Chatbot takes list of [user, bot] pairs
+            chatbot = gr.Chatbot(label="Conversation", height=450)
             with gr.Row():
                 query_input = gr.Textbox(
                     label="Your question",
+                    placeholder="What is the main topic of this video?",
                     scale=5,
                 )
                 send_btn = gr.Button("Send 🚀", variant="primary", scale=1)
+            clear_btn = gr.Button("🗑️ Clear", variant="secondary")
+            # gr.State stores the history list between interactions
             chat_history = gr.State([])
             send_btn.click(
                 fn=chat,
                 inputs=[query_input, chat_history],
                 outputs=[chatbot, query_input],
+            ).then(
+                fn=lambda h: h,
+                inputs=[chatbot],
+                outputs=[chat_history],
+            )
             query_input.submit(
                 fn=chat,
                 inputs=[query_input, chat_history],
                 outputs=[chatbot, query_input],
+            ).then(
+                fn=lambda h: h,
+                inputs=[chatbot],
+                outputs=[chat_history],
+            )
+            clear_btn.click(
+                fn=lambda: ([], []),
+                outputs=[chatbot, chat_history],
+            )
 # ---------------------------------------------------------------------------
+# Launch — theme passed here in Gradio 6.x
 # ---------------------------------------------------------------------------
 if __name__ == "__main__":
     app.launch()