Spaces:

SafiUllahAdam
/

YouTube_Learning_Assistant

Sleeping

App Files Files Community

SafiUllahAdam commited on Oct 22, 2025

Commit

3c6ab71

verified ·

1 Parent(s): 0aa1e76

Fixed import and stabilized YouTube transcript extraction

Browse files

Ensured proper import of YouTubeTranscriptApi and simplified transcript fetching.
Resolves 'name not defined' and transcript retrieval errors.

Files changed (1) hide show

app.py +60 -50

app.py CHANGED Viewed

@@ -1,94 +1,104 @@
 import streamlit as st
-from youtube_transcript_api import YouTubeTranscriptApi as yta
-from transformers import pipeline
 import re
-# Helper functions
-def extract_video_id(url):
-    """Extract YouTube video ID from a full URL."""
     pattern = r"(?:v=|\/)([0-9A-Za-z_-]{11}).*"
     match = re.search(pattern, url)
     return match.group(1) if match else None
-def get_transcript(video_id):
-    """Fetch transcript text for a given video ID (compatible with all versions)."""
     try:
-        # Standard way — works in most versions
         transcript = YouTubeTranscriptApi.get_transcript(video_id, languages=['en'])
     except Exception as e:
-        # Fallback: try fetching manually from other available languages
-        try:
-            transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
-            transcript = transcript_list.find_transcript(['en']).fetch()
-        except Exception:
-            raise Exception(f"Transcript not available or video has no subtitles. Details: {str(e)}")
-    text = " ".join([t["text"] for t in transcript])
-    return text
-def summarize_MEM_style(text):
-    """
-    Summarize transcript in MEM style:
-    simple, story-like, structured, and step-by-step.
-    """
     summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
-    # Prepare chunks to fit within model limits (~1024 tokens)
-    max_chunk = 1000
-    text_chunks = [text[i:i+max_chunk] for i in range(0, len(text), max_chunk)]
     summary = ""
-    for chunk in text_chunks:
-        # Add MEM-style instruction to the input prompt
         prompt = f"""
-        Summarize and explain this text using the MEM (Model Explanation Method):
         - Use simple, story-like language.
         - Explain step-by-step, as if teaching a beginner.
         - Focus on understanding, not technical detail.
-        - Keep the explanation calm, structured, and easy to remember.
         Text:
         {chunk}
         """
-        summary_part = summarizer(prompt, max_length=200, min_length=80, do_sample=False)[0]['summary_text']
-        summary += summary_part + " "
     return summary.strip()
 # Streamlit Interface
-st.set_page_config(page_title="🎥 YouTube Learning Assistant (MEM Style)", layout="centered")
-st.title("🎓 YouTube Learning Assistant (MEM Style)")
-st.markdown("Paste a **YouTube video link** below to generate its transcript and MEM-style explanation.")
 url = st.text_input("Enter YouTube URL:")
 if st.button("Generate MEM Summary"):
-    if url:
-        with st.spinner("Fetching transcript... please wait ⏳"):
-            video_id = extract_video_id(url)
-            if not video_id:
-                st.error("Invalid YouTube URL. Please check and try again.")
-            else:
                 try:
                     text = get_transcript(video_id)
-                    st.success("Transcript fetched successfully!")
-                    st.subheader(" Transcript (first 500 chars)")
-                    st.write(text[:500] + "...")
-                    with st.spinner("Creating your MEM-style summary... this may take a minute ⏳"):
                         summary = summarize_MEM_style(text)
-                    st.subheader("MEM-Style Explanation")
                     st.write(summary)
                 except Exception as e:
                     st.error(f"Error: {str(e)}")
-    else:
-        st.warning("Please paste a YouTube link first.")

+# YouTube Learning Assistant (Personalized MEM Style)
+# Stable Final Version – works on Hugging Face Spaces
 import streamlit as st
 import re
+from transformers import pipeline
+import requests
+# Safe import of transcript library
+try:
+    from youtube_transcript_api import YouTubeTranscriptApi
+except ImportError:
+    st.error("youtube-transcript-api not found. Make sure it’s in requirements.txt")
+# Helper Functions
+def extract_video_id(url: str):
+    """Extract the 11-character YouTube video ID from any valid URL."""
     pattern = r"(?:v=|\/)([0-9A-Za-z_-]{11}).*"
     match = re.search(pattern, url)
     return match.group(1) if match else None
+def get_transcript(video_id: str) -> str:
+    """
+    Fetch the English transcript text for a given YouTube video.
+    Falls back to YouTube oEmbed check if unavailable.
+    """
     try:
+        # Standard transcript fetch
         transcript = YouTubeTranscriptApi.get_transcript(video_id, languages=['en'])
+        text = " ".join([t["text"] for t in transcript])
+        if not text.strip():
+            raise Exception("Transcript empty.")
+        return text
     except Exception as e:
+        # Graceful fallback: check if video exists / has captions
+        check = requests.get(f"https://www.youtube.com/oembed?url=https://www.youtube.com/watch?v={video_id}")
+        if check.status_code == 200:
+            raise Exception("Transcript not available — this video likely has no English subtitles.")
+        else:
+            raise Exception(f"Invalid video ID or unavailable video. Details: {str(e)}")
+def summarize_MEM_style(text: str) -> str:
+    """Summarize transcript using MEM (Model Explanation Method)."""
     summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
+    max_chunk = 1000  # keep inside model token limit
+    chunks = [text[i:i + max_chunk] for i in range(0, len(text), max_chunk)]
     summary = ""
+    for chunk in chunks:
         prompt = f"""
+        Summarize and explain this content using the MEM (Model Explanation Method):
         - Use simple, story-like language.
         - Explain step-by-step, as if teaching a beginner.
         - Focus on understanding, not technical detail.
+        - Keep tone calm, structured, and easy to remember.
         Text:
         {chunk}
         """
+        out = summarizer(prompt, max_length=200, min_length=80, do_sample=False)[0]['summary_text']
+        summary += out + " "
     return summary.strip()
 # Streamlit Interface
+st.set_page_config(page_title="🎥  YouTube Learning Assistant (Personalized MEM Style)", layout="centered")
+st.title("🎓 YouTube Learning Assistant (Personalized MEM Style)")
+st.markdown("Paste a **YouTube video link** below to generate its transcript and a MEM-style explanation.")
 url = st.text_input("Enter YouTube URL:")
 if st.button("Generate MEM Summary"):
+    if not url:
+        st.warning("Please paste a YouTube link first.")
+    else:
+        video_id = extract_video_id(url)
+        if not video_id:
+            st.error("Invalid YouTube URL. Please check and try again.")
+        else:
+            with st.spinner("Fetching transcript… please wait ⏳"):
                 try:
                     text = get_transcript(video_id)
+                    st.success("Transcript fetched successfully ✅")
+                    st.subheader("📝 Transcript Preview")
+                    st.write(text[:600] + "…")
+                    with st.spinner("Creating your MEM-style summary… ⏳"):
                         summary = summarize_MEM_style(text)
+                    st.subheader("📘 MEM-Style Explanation")
                     st.write(summary)
                 except Exception as e:
                     st.error(f"Error: {str(e)}")