File size: 3,749 Bytes
3c6ab71 f21c398 3c6ab71 f21c398 3c6ab71 f21c398 3c6ab71 f21c398 3c6ab71 f21c398 3c6ab71 0aa1e76 3c6ab71 0aa1e76 3c6ab71 0aa1e76 3c6ab71 f21c398 0aa1e76 3c6ab71 f21c398 3c6ab71 f21c398 3c6ab71 f21c398 3c6ab71 f21c398 3c6ab71 f21c398 3c6ab71 f21c398 3c6ab71 f21c398 3c6ab71 f21c398 3c6ab71 f21c398 3c6ab71 f21c398 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 |
# YouTube Learning Assistant (Personalized MEM Style)
# Stable Final Version – works on Hugging Face Spaces
import streamlit as st
import re
from transformers import pipeline
import requests
# Safe import of transcript library
try:
from youtube_transcript_api import YouTubeTranscriptApi
except ImportError:
st.error("youtube-transcript-api not found. Make sure it’s in requirements.txt")
# Helper Functions
def extract_video_id(url: str):
"""Extract the 11-character YouTube video ID from any valid URL."""
pattern = r"(?:v=|\/)([0-9A-Za-z_-]{11}).*"
match = re.search(pattern, url)
return match.group(1) if match else None
def get_transcript(video_id: str) -> str:
"""
Fetch the English transcript text for a given YouTube video.
Falls back to YouTube oEmbed check if unavailable.
"""
try:
# Standard transcript fetch
transcript = YouTubeTranscriptApi.get_transcript(video_id, languages=['en'])
text = " ".join([t["text"] for t in transcript])
if not text.strip():
raise Exception("Transcript empty.")
return text
except Exception as e:
# Graceful fallback: check if video exists / has captions
check = requests.get(f"https://www.youtube.com/oembed?url=https://www.youtube.com/watch?v={video_id}")
if check.status_code == 200:
raise Exception("Transcript not available — this video likely has no English subtitles.")
else:
raise Exception(f"Invalid video ID or unavailable video. Details: {str(e)}")
def summarize_MEM_style(text: str) -> str:
"""Summarize transcript using MEM (Model Explanation Method)."""
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
max_chunk = 1000 # keep inside model token limit
chunks = [text[i:i + max_chunk] for i in range(0, len(text), max_chunk)]
summary = ""
for chunk in chunks:
prompt = f"""
Summarize and explain this content using the MEM (Model Explanation Method):
- Use simple, story-like language.
- Explain step-by-step, as if teaching a beginner.
- Focus on understanding, not technical detail.
- Keep tone calm, structured, and easy to remember.
Text:
{chunk}
"""
out = summarizer(prompt, max_length=200, min_length=80, do_sample=False)[0]['summary_text']
summary += out + " "
return summary.strip()
# Streamlit Interface
st.set_page_config(page_title="🎥 YouTube Learning Assistant (Personalized MEM Style)", layout="centered")
st.title("🎓 YouTube Learning Assistant (Personalized MEM Style)")
st.markdown("Paste a **YouTube video link** below to generate its transcript and a MEM-style explanation.")
url = st.text_input("Enter YouTube URL:")
if st.button("Generate MEM Summary"):
if not url:
st.warning("Please paste a YouTube link first.")
else:
video_id = extract_video_id(url)
if not video_id:
st.error("Invalid YouTube URL. Please check and try again.")
else:
with st.spinner("Fetching transcript… please wait ⏳"):
try:
text = get_transcript(video_id)
st.success("Transcript fetched successfully ✅")
st.subheader("📝 Transcript Preview")
st.write(text[:600] + "…")
with st.spinner("Creating your MEM-style summary… ⏳"):
summary = summarize_MEM_style(text)
st.subheader("📘 MEM-Style Explanation")
st.write(summary)
except Exception as e:
st.error(f"Error: {str(e)}")
|