File size: 3,749 Bytes
3c6ab71
 
 
 
 
f21c398
 
3c6ab71
 
f21c398
3c6ab71
 
 
 
 
f21c398
 
3c6ab71
f21c398
3c6ab71
 
 
f21c398
 
 
 
 
3c6ab71
 
 
 
 
0aa1e76
3c6ab71
0aa1e76
3c6ab71
 
 
 
0aa1e76
3c6ab71
 
 
 
 
 
f21c398
0aa1e76
3c6ab71
 
f21c398
 
3c6ab71
 
f21c398
3c6ab71
 
f21c398
3c6ab71
f21c398
 
 
3c6ab71
f21c398
 
 
 
3c6ab71
 
f21c398
 
 
 
 
 
3c6ab71
 
 
 
f21c398
 
 
 
3c6ab71
 
 
 
 
 
 
 
f21c398
 
3c6ab71
 
 
 
 
f21c398
3c6ab71
f21c398
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105

# YouTube Learning Assistant (Personalized MEM Style)
# Stable Final Version – works on Hugging Face Spaces


import streamlit as st
import re
from transformers import pipeline
import requests

# Safe import of transcript library 
try:
    from youtube_transcript_api import YouTubeTranscriptApi
except ImportError:
    st.error("youtube-transcript-api not found. Make sure it’s in requirements.txt")


# Helper Functions


def extract_video_id(url: str):
    """Extract the 11-character YouTube video ID from any valid URL."""
    pattern = r"(?:v=|\/)([0-9A-Za-z_-]{11}).*"
    match = re.search(pattern, url)
    return match.group(1) if match else None


def get_transcript(video_id: str) -> str:
    """
    Fetch the English transcript text for a given YouTube video.
    Falls back to YouTube oEmbed check if unavailable.
    """
    try:
        # Standard transcript fetch
        transcript = YouTubeTranscriptApi.get_transcript(video_id, languages=['en'])
        text = " ".join([t["text"] for t in transcript])
        if not text.strip():
            raise Exception("Transcript empty.")
        return text
    except Exception as e:
        # Graceful fallback: check if video exists / has captions
        check = requests.get(f"https://www.youtube.com/oembed?url=https://www.youtube.com/watch?v={video_id}")
        if check.status_code == 200:
            raise Exception("Transcript not available — this video likely has no English subtitles.")
        else:
            raise Exception(f"Invalid video ID or unavailable video. Details: {str(e)}")


def summarize_MEM_style(text: str) -> str:
    """Summarize transcript using MEM (Model Explanation Method)."""
    summarizer = pipeline("summarization", model="facebook/bart-large-cnn")

    max_chunk = 1000  # keep inside model token limit
    chunks = [text[i:i + max_chunk] for i in range(0, len(text), max_chunk)]
    summary = ""

    for chunk in chunks:
        prompt = f"""
        Summarize and explain this content using the MEM (Model Explanation Method):
        - Use simple, story-like language.
        - Explain step-by-step, as if teaching a beginner.
        - Focus on understanding, not technical detail.
        - Keep tone calm, structured, and easy to remember.

        Text:
        {chunk}
        """
        out = summarizer(prompt, max_length=200, min_length=80, do_sample=False)[0]['summary_text']
        summary += out + " "

    return summary.strip()


# Streamlit Interface


st.set_page_config(page_title="🎥  YouTube Learning Assistant (Personalized MEM Style)", layout="centered")
st.title("🎓 YouTube Learning Assistant (Personalized MEM Style)")
st.markdown("Paste a **YouTube video link** below to generate its transcript and a MEM-style explanation.")

url = st.text_input("Enter YouTube URL:")

if st.button("Generate MEM Summary"):
    if not url:
        st.warning("Please paste a YouTube link first.")
    else:
        video_id = extract_video_id(url)
        if not video_id:
            st.error("Invalid YouTube URL. Please check and try again.")
        else:
            with st.spinner("Fetching transcript… please wait ⏳"):
                try:
                    text = get_transcript(video_id)
                    st.success("Transcript fetched successfully ✅")
                    st.subheader("📝 Transcript Preview")
                    st.write(text[:600] + "…")

                    with st.spinner("Creating your MEM-style summary… ⏳"):
                        summary = summarize_MEM_style(text)
                    st.subheader("📘 MEM-Style Explanation")
                    st.write(summary)

                except Exception as e:
                    st.error(f"Error: {str(e)}")