# YouTube Learning Assistant (Personalized MEM Style) # Stable Final Version – works on Hugging Face Spaces import streamlit as st import re from transformers import pipeline import requests # Safe import of transcript library try: from youtube_transcript_api import YouTubeTranscriptApi except ImportError: st.error("youtube-transcript-api not found. Make sure it’s in requirements.txt") # Helper Functions def extract_video_id(url: str): """Extract the 11-character YouTube video ID from any valid URL.""" pattern = r"(?:v=|\/)([0-9A-Za-z_-]{11}).*" match = re.search(pattern, url) return match.group(1) if match else None def get_transcript(video_id: str) -> str: """ Fetch the English transcript text for a given YouTube video. Falls back to YouTube oEmbed check if unavailable. """ try: # Standard transcript fetch transcript = YouTubeTranscriptApi.get_transcript(video_id, languages=['en']) text = " ".join([t["text"] for t in transcript]) if not text.strip(): raise Exception("Transcript empty.") return text except Exception as e: # Graceful fallback: check if video exists / has captions check = requests.get(f"https://www.youtube.com/oembed?url=https://www.youtube.com/watch?v={video_id}") if check.status_code == 200: raise Exception("Transcript not available — this video likely has no English subtitles.") else: raise Exception(f"Invalid video ID or unavailable video. Details: {str(e)}") def summarize_MEM_style(text: str) -> str: """Summarize transcript using MEM (Model Explanation Method).""" summarizer = pipeline("summarization", model="facebook/bart-large-cnn") max_chunk = 1000 # keep inside model token limit chunks = [text[i:i + max_chunk] for i in range(0, len(text), max_chunk)] summary = "" for chunk in chunks: prompt = f""" Summarize and explain this content using the MEM (Model Explanation Method): - Use simple, story-like language. - Explain step-by-step, as if teaching a beginner. - Focus on understanding, not technical detail. - Keep tone calm, structured, and easy to remember. Text: {chunk} """ out = summarizer(prompt, max_length=200, min_length=80, do_sample=False)[0]['summary_text'] summary += out + " " return summary.strip() # Streamlit Interface st.set_page_config(page_title="🎥 YouTube Learning Assistant (Personalized MEM Style)", layout="centered") st.title("🎓 YouTube Learning Assistant (Personalized MEM Style)") st.markdown("Paste a **YouTube video link** below to generate its transcript and a MEM-style explanation.") url = st.text_input("Enter YouTube URL:") if st.button("Generate MEM Summary"): if not url: st.warning("Please paste a YouTube link first.") else: video_id = extract_video_id(url) if not video_id: st.error("Invalid YouTube URL. Please check and try again.") else: with st.spinner("Fetching transcript… please wait ⏳"): try: text = get_transcript(video_id) st.success("Transcript fetched successfully ✅") st.subheader("📝 Transcript Preview") st.write(text[:600] + "…") with st.spinner("Creating your MEM-style summary… ⏳"): summary = summarize_MEM_style(text) st.subheader("📘 MEM-Style Explanation") st.write(summary) except Exception as e: st.error(f"Error: {str(e)}")