# YouTube Learning Assistant (Personalized MEM Style)
# Stable Final Version – works on Hugging Face Spaces


import streamlit as st
import re
from transformers import pipeline
import requests

# Safe import of transcript library 
try:
    from youtube_transcript_api import YouTubeTranscriptApi
except ImportError:
    st.error("youtube-transcript-api not found. Make sure it’s in requirements.txt")


# Helper Functions


def extract_video_id(url: str):
    """Extract the 11-character YouTube video ID from any valid URL."""
    pattern = r"(?:v=|\/)([0-9A-Za-z_-]{11}).*"
    match = re.search(pattern, url)
    return match.group(1) if match else None


def get_transcript(video_id: str) -> str:
    """
    Fetch the English transcript text for a given YouTube video.
    Falls back to YouTube oEmbed check if unavailable.
    """
    try:
        # Standard transcript fetch
        transcript = YouTubeTranscriptApi.get_transcript(video_id, languages=['en'])
        text = " ".join([t["text"] for t in transcript])
        if not text.strip():
            raise Exception("Transcript empty.")
        return text
    except Exception as e:
        # Graceful fallback: check if video exists / has captions
        check = requests.get(f"https://www.youtube.com/oembed?url=https://www.youtube.com/watch?v={video_id}")
        if check.status_code == 200:
            raise Exception("Transcript not available — this video likely has no English subtitles.")
        else:
            raise Exception(f"Invalid video ID or unavailable video. Details: {str(e)}")


def summarize_MEM_style(text: str) -> str:
    """Summarize transcript using MEM (Model Explanation Method)."""
    summarizer = pipeline("summarization", model="facebook/bart-large-cnn")

    max_chunk = 1000  # keep inside model token limit
    chunks = [text[i:i + max_chunk] for i in range(0, len(text), max_chunk)]
    summary = ""

    for chunk in chunks:
        prompt = f"""
        Summarize and explain this content using the MEM (Model Explanation Method):
        - Use simple, story-like language.
        - Explain step-by-step, as if teaching a beginner.
        - Focus on understanding, not technical detail.
        - Keep tone calm, structured, and easy to remember.

        Text:
        {chunk}
        """
        out = summarizer(prompt, max_length=200, min_length=80, do_sample=False)[0]['summary_text']
        summary += out + " "

    return summary.strip()


# Streamlit Interface


st.set_page_config(page_title="🎥  YouTube Learning Assistant (Personalized MEM Style)", layout="centered")
st.title("🎓 YouTube Learning Assistant (Personalized MEM Style)")
st.markdown("Paste a **YouTube video link** below to generate its transcript and a MEM-style explanation.")

url = st.text_input("Enter YouTube URL:")

if st.button("Generate MEM Summary"):
    if not url:
        st.warning("Please paste a YouTube link first.")
    else:
        video_id = extract_video_id(url)
        if not video_id:
            st.error("Invalid YouTube URL. Please check and try again.")
        else:
            with st.spinner("Fetching transcript… please wait ⏳"):
                try:
                    text = get_transcript(video_id)
                    st.success("Transcript fetched successfully ✅")
                    st.subheader("📝 Transcript Preview")
                    st.write(text[:600] + "…")

                    with st.spinner("Creating your MEM-style summary… ⏳"):
                        summary = summarize_MEM_style(text)
                    st.subheader("📘 MEM-Style Explanation")
                    st.write(summary)

                except Exception as e:
                    st.error(f"Error: {str(e)}")