MahatirTusher commited on
Commit
5265a5a
Β·
verified Β·
1 Parent(s): 192f007

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +49 -1
app.py CHANGED
@@ -13,12 +13,16 @@ from bs4 import SoupStrainer
13
  from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled, NoTranscriptFound
14
  import yt_dlp
15
  import re
 
 
16
 
17
  # Load environment variables (optional)
18
  load_dotenv()
19
 
20
  # Hardcoded Groq API key
21
  GROQ_API_KEY = "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"
 
 
22
 
23
  # Custom CSS
24
  st.markdown("""
@@ -123,7 +127,7 @@ if "llm" not in st.session_state:
123
  st.session_state.llm = ChatGroq(
124
  api_key=GROQ_API_KEY,
125
  model="llama3-70b-8192",
126
- max_tokens=512 # Further reduced to minimize resource usage
127
  )
128
 
129
  # Sidebar for URL and YouTube input
@@ -203,6 +207,45 @@ def fetch_youtube_transcript(video_id):
203
  st.error(f"Error fetching transcript with youtube-transcript-api: {str(e)}")
204
  return None
205
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
206
  # Function to extract subtitles using yt-dlp with cookies
207
  def extract_subtitles_with_ytdlp(video_url):
208
  ydl_opts = {
@@ -360,6 +403,10 @@ if process_youtube_clicked:
360
  st.text("Fetching Closed Captions...Started...βœ…βœ…βœ…")
361
  transcript_text = extract_subtitles_with_ytdlp(youtube_url)
362
 
 
 
 
 
363
  if not transcript_text:
364
  st.error(
365
  "No transcripts or closed captions available. "
@@ -369,6 +416,7 @@ if process_youtube_clicked:
369
  "Solutions:\n"
370
  "- Ensure captions are enabled for the video by checking the video settings on YouTube (gear icon > Subtitles/CC > Enable if available).\n"
371
  "- Regenerate and upload a fresh cookies.txt file (see instructions above).\n"
 
372
  "- Try a different video (e.g., https://www.youtube.com/watch?v=dQw4w9WgXcQ, which has transcripts available).\n"
373
  "- Test locally to rule out Hugging Face Spaces IP restrictions by running: pip install -r requirements.txt && streamlit run app.py"
374
  )
 
13
  from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled, NoTranscriptFound
14
  import yt_dlp
15
  import re
16
+ from googleapiclient.discovery import build
17
+ from googleapiclient.errors import HttpError
18
 
19
  # Load environment variables (optional)
20
  load_dotenv()
21
 
22
  # Hardcoded Groq API key
23
  GROQ_API_KEY = "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"
24
+ # YouTube API key (to be set in Hugging Face Spaces secrets)
25
+ YOUTUBE_API_KEY = os.getenv("YOUTUBE_API_KEY")
26
 
27
  # Custom CSS
28
  st.markdown("""
 
127
  st.session_state.llm = ChatGroq(
128
  api_key=GROQ_API_KEY,
129
  model="llama3-70b-8192",
130
+ max_tokens=512 # Keep reduced to minimize resource usage
131
  )
132
 
133
  # Sidebar for URL and YouTube input
 
207
  st.error(f"Error fetching transcript with youtube-transcript-api: {str(e)}")
208
  return None
209
 
210
+ # Function to fetch captions using YouTube Data API (limited to listing with API key)
211
+ def fetch_youtube_captions_api(video_id, api_key):
212
+ if not api_key:
213
+ st.warning("YOUTUBE_API_KEY not set. Skipping YouTube Data API fallback.")
214
+ return None
215
+ try:
216
+ youtube = build('youtube', 'v3', developerKey=api_key)
217
+ captions = youtube.captions().list(
218
+ part='snippet',
219
+ videoId=video_id
220
+ ).execute()
221
+
222
+ caption_id = None
223
+ for item in captions.get('items', []):
224
+ if item['snippet']['language'] == 'en':
225
+ caption_id = item['id']
226
+ break
227
+ elif item['snippet']['language'] in ['en-US', 'en-GB']:
228
+ caption_id = item['id']
229
+ break
230
+
231
+ if not caption_id:
232
+ st.warning("No English captions found via YouTube Data API.")
233
+ return None
234
+
235
+ # Note: Downloading captions requires OAuth 2.0 authentication
236
+ st.warning(
237
+ "English captions are available for this video but cannot be fetched with an API key alone. "
238
+ "Downloading captions requires OAuth 2.0 authentication, which is not supported in Hugging Face Spaces without user interaction. "
239
+ "To fetch captions:\n"
240
+ "- Test locally with OAuth 2.0 setup (see https://developers.google.com/youtube/v3/guides/auth/installed-apps for instructions).\n"
241
+ "- Or try a video with transcripts available (e.g., https://www.youtube.com/watch?v=dQw4w9WgXcQ)."
242
+ )
243
+ return None
244
+
245
+ except HttpError as e:
246
+ st.error(f"Error fetching captions with YouTube Data API: {str(e)}")
247
+ return None
248
+
249
  # Function to extract subtitles using yt-dlp with cookies
250
  def extract_subtitles_with_ytdlp(video_url):
251
  ydl_opts = {
 
403
  st.text("Fetching Closed Captions...Started...βœ…βœ…βœ…")
404
  transcript_text = extract_subtitles_with_ytdlp(youtube_url)
405
 
406
+ if not transcript_text and YOUTUBE_API_KEY:
407
+ st.text("Fetching Captions via YouTube Data API...Started...βœ…βœ…βœ…")
408
+ transcript_text = fetch_youtube_captions_api(video_id, YOUTUBE_API_KEY)
409
+
410
  if not transcript_text:
411
  st.error(
412
  "No transcripts or closed captions available. "
 
416
  "Solutions:\n"
417
  "- Ensure captions are enabled for the video by checking the video settings on YouTube (gear icon > Subtitles/CC > Enable if available).\n"
418
  "- Regenerate and upload a fresh cookies.txt file (see instructions above).\n"
419
+ "- Ensure YOUTUBE_API_KEY is set in Spaces secrets (Settings > Secrets > Add YOUTUBE_API_KEY).\n"
420
  "- Try a different video (e.g., https://www.youtube.com/watch?v=dQw4w9WgXcQ, which has transcripts available).\n"
421
  "- Test locally to rule out Hugging Face Spaces IP restrictions by running: pip install -r requirements.txt && streamlit run app.py"
422
  )