SafiUllahAdam commited on
Commit
3c6ab71
·
verified ·
1 Parent(s): 0aa1e76

Fixed import and stabilized YouTube transcript extraction

Browse files

Ensured proper import of YouTubeTranscriptApi and simplified transcript fetching.
Resolves 'name not defined' and transcript retrieval errors.

Files changed (1) hide show
  1. app.py +60 -50
app.py CHANGED
@@ -1,94 +1,104 @@
 
 
 
 
 
1
  import streamlit as st
2
- from youtube_transcript_api import YouTubeTranscriptApi as yta
3
- from transformers import pipeline
4
  import re
 
 
5
 
 
 
 
 
 
6
 
7
- # Helper functions
8
 
 
9
 
10
- def extract_video_id(url):
11
- """Extract YouTube video ID from a full URL."""
 
12
  pattern = r"(?:v=|\/)([0-9A-Za-z_-]{11}).*"
13
  match = re.search(pattern, url)
14
  return match.group(1) if match else None
15
 
16
 
17
- def get_transcript(video_id):
18
- """Fetch transcript text for a given video ID (compatible with all versions)."""
 
 
 
19
  try:
20
- # Standard way — works in most versions
21
  transcript = YouTubeTranscriptApi.get_transcript(video_id, languages=['en'])
 
 
 
 
22
  except Exception as e:
23
- # Fallback: try fetching manually from other available languages
24
- try:
25
- transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
26
- transcript = transcript_list.find_transcript(['en']).fetch()
27
- except Exception:
28
- raise Exception(f"Transcript not available or video has no subtitles. Details: {str(e)}")
29
-
30
- text = " ".join([t["text"] for t in transcript])
31
- return text
32
-
33
 
34
 
35
- def summarize_MEM_style(text):
36
- """
37
- Summarize transcript in MEM style:
38
- simple, story-like, structured, and step-by-step.
39
- """
40
  summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
41
 
42
- # Prepare chunks to fit within model limits (~1024 tokens)
43
- max_chunk = 1000
44
- text_chunks = [text[i:i+max_chunk] for i in range(0, len(text), max_chunk)]
45
-
46
  summary = ""
47
- for chunk in text_chunks:
48
- # Add MEM-style instruction to the input prompt
49
  prompt = f"""
50
- Summarize and explain this text using the MEM (Model Explanation Method):
51
  - Use simple, story-like language.
52
  - Explain step-by-step, as if teaching a beginner.
53
  - Focus on understanding, not technical detail.
54
- - Keep the explanation calm, structured, and easy to remember.
55
 
56
  Text:
57
  {chunk}
58
  """
59
- summary_part = summarizer(prompt, max_length=200, min_length=80, do_sample=False)[0]['summary_text']
60
- summary += summary_part + " "
61
 
62
  return summary.strip()
63
 
64
 
65
  # Streamlit Interface
66
 
67
- st.set_page_config(page_title="🎥 YouTube Learning Assistant (MEM Style)", layout="centered")
68
- st.title("🎓 YouTube Learning Assistant (MEM Style)")
69
- st.markdown("Paste a **YouTube video link** below to generate its transcript and MEM-style explanation.")
 
70
 
71
  url = st.text_input("Enter YouTube URL:")
72
 
73
  if st.button("Generate MEM Summary"):
74
- if url:
75
- with st.spinner("Fetching transcript... please wait "):
76
- video_id = extract_video_id(url)
77
- if not video_id:
78
- st.error("Invalid YouTube URL. Please check and try again.")
79
- else:
 
 
80
  try:
81
  text = get_transcript(video_id)
82
- st.success("Transcript fetched successfully!")
83
- st.subheader(" Transcript (first 500 chars)")
84
- st.write(text[:500] + "...")
85
-
86
- with st.spinner("Creating your MEM-style summary... this may take a minute ⏳"):
87
  summary = summarize_MEM_style(text)
88
- st.subheader("MEM-Style Explanation")
89
  st.write(summary)
90
 
91
  except Exception as e:
92
  st.error(f"Error: {str(e)}")
93
- else:
94
- st.warning("Please paste a YouTube link first.")
 
1
+
2
+ # YouTube Learning Assistant (Personalized MEM Style)
3
+ # Stable Final Version – works on Hugging Face Spaces
4
+
5
+
6
  import streamlit as st
 
 
7
  import re
8
+ from transformers import pipeline
9
+ import requests
10
 
11
+ # Safe import of transcript library
12
+ try:
13
+ from youtube_transcript_api import YouTubeTranscriptApi
14
+ except ImportError:
15
+ st.error("youtube-transcript-api not found. Make sure it’s in requirements.txt")
16
 
 
17
 
18
+ # Helper Functions
19
 
20
+
21
+ def extract_video_id(url: str):
22
+ """Extract the 11-character YouTube video ID from any valid URL."""
23
  pattern = r"(?:v=|\/)([0-9A-Za-z_-]{11}).*"
24
  match = re.search(pattern, url)
25
  return match.group(1) if match else None
26
 
27
 
28
+ def get_transcript(video_id: str) -> str:
29
+ """
30
+ Fetch the English transcript text for a given YouTube video.
31
+ Falls back to YouTube oEmbed check if unavailable.
32
+ """
33
  try:
34
+ # Standard transcript fetch
35
  transcript = YouTubeTranscriptApi.get_transcript(video_id, languages=['en'])
36
+ text = " ".join([t["text"] for t in transcript])
37
+ if not text.strip():
38
+ raise Exception("Transcript empty.")
39
+ return text
40
  except Exception as e:
41
+ # Graceful fallback: check if video exists / has captions
42
+ check = requests.get(f"https://www.youtube.com/oembed?url=https://www.youtube.com/watch?v={video_id}")
43
+ if check.status_code == 200:
44
+ raise Exception("Transcript not available — this video likely has no English subtitles.")
45
+ else:
46
+ raise Exception(f"Invalid video ID or unavailable video. Details: {str(e)}")
 
 
 
 
47
 
48
 
49
+ def summarize_MEM_style(text: str) -> str:
50
+ """Summarize transcript using MEM (Model Explanation Method)."""
 
 
 
51
  summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
52
 
53
+ max_chunk = 1000 # keep inside model token limit
54
+ chunks = [text[i:i + max_chunk] for i in range(0, len(text), max_chunk)]
 
 
55
  summary = ""
56
+
57
+ for chunk in chunks:
58
  prompt = f"""
59
+ Summarize and explain this content using the MEM (Model Explanation Method):
60
  - Use simple, story-like language.
61
  - Explain step-by-step, as if teaching a beginner.
62
  - Focus on understanding, not technical detail.
63
+ - Keep tone calm, structured, and easy to remember.
64
 
65
  Text:
66
  {chunk}
67
  """
68
+ out = summarizer(prompt, max_length=200, min_length=80, do_sample=False)[0]['summary_text']
69
+ summary += out + " "
70
 
71
  return summary.strip()
72
 
73
 
74
  # Streamlit Interface
75
 
76
+
77
+ st.set_page_config(page_title="🎥 YouTube Learning Assistant (Personalized MEM Style)", layout="centered")
78
+ st.title("🎓 YouTube Learning Assistant (Personalized MEM Style)")
79
+ st.markdown("Paste a **YouTube video link** below to generate its transcript and a MEM-style explanation.")
80
 
81
  url = st.text_input("Enter YouTube URL:")
82
 
83
  if st.button("Generate MEM Summary"):
84
+ if not url:
85
+ st.warning("Please paste a YouTube link first.")
86
+ else:
87
+ video_id = extract_video_id(url)
88
+ if not video_id:
89
+ st.error("Invalid YouTube URL. Please check and try again.")
90
+ else:
91
+ with st.spinner("Fetching transcript… please wait ⏳"):
92
  try:
93
  text = get_transcript(video_id)
94
+ st.success("Transcript fetched successfully")
95
+ st.subheader("📝 Transcript Preview")
96
+ st.write(text[:600] + "")
97
+
98
+ with st.spinner("Creating your MEM-style summary ⏳"):
99
  summary = summarize_MEM_style(text)
100
+ st.subheader("📘 MEM-Style Explanation")
101
  st.write(summary)
102
 
103
  except Exception as e:
104
  st.error(f"Error: {str(e)}")