MahatirTusher commited on
Commit
c8f736d
Β·
verified Β·
1 Parent(s): 1c8b05d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +96 -9
app.py CHANGED
@@ -6,6 +6,7 @@ from langchain_community.vectorstores.faiss import FAISS
6
  from langchain.text_splitter import RecursiveCharacterTextSplitter
7
  from langchain_core.documents import Document
8
  import os
 
9
  from langchain_groq import ChatGroq
10
  from langchain.chains.qa_with_sources.retrieval import RetrievalQAWithSourcesChain
11
  from langchain.prompts import PromptTemplate
@@ -15,15 +16,22 @@ import yt_dlp
15
  import re
16
  from googleapiclient.discovery import build
17
  from googleapiclient.errors import HttpError
 
 
 
18
 
19
  # Load environment variables (optional)
20
  load_dotenv()
21
 
22
  # Hardcoded Groq API key
23
  GROQ_API_KEY = "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"
24
- # YouTube API key (to be set in Hugging Face Spaces secrets)
25
  YOUTUBE_API_KEY = os.getenv("YOUTUBE_API_KEY")
26
 
 
 
 
 
27
  # Custom CSS
28
  st.markdown("""
29
  <style>
@@ -207,10 +215,89 @@ def fetch_youtube_transcript(video_id):
207
  st.error(f"Error fetching transcript with youtube-transcript-api: {str(e)}")
208
  return None
209
 
210
- # Function to fetch captions using YouTube Data API (limited to listing with API key)
211
- def fetch_youtube_captions_api(video_id, api_key):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
212
  if not api_key:
213
- st.warning("YOUTUBE_API_KEY not set. Skipping YouTube Data API fallback.")
214
  return None
215
  try:
216
  youtube = build('youtube', 'v3', developerKey=api_key)
@@ -237,13 +324,13 @@ def fetch_youtube_captions_api(video_id, api_key):
237
  "English captions are available for this video but cannot be fetched with an API key alone. "
238
  "Downloading captions requires OAuth 2.0 authentication, which is not supported in Hugging Face Spaces without user interaction. "
239
  "To fetch captions:\n"
240
- "- Test locally with OAuth 2.0 setup (see https://developers.google.com/youtube/v3/guides/auth/installed-apps for instructions).\n"
241
  "- Or try a video with transcripts available (e.g., https://www.youtube.com/watch?v=dQw4w9WgXcQ)."
242
  )
243
  return None
244
 
245
  except HttpError as e:
246
- st.error(f"Error fetching captions with YouTube Data API: {str(e)}")
247
  return None
248
 
249
  # Function to extract subtitles using yt-dlp with cookies
@@ -400,10 +487,10 @@ if process_youtube_clicked:
400
 
401
  if not transcript_text:
402
  st.warning("Transcripts are disabled or unavailable. Attempting to fetch closed captions...")
403
- st.text("Fetching Closed Captions...Started...βœ…βœ…βœ…")
404
  transcript_text = extract_subtitles_with_ytdlp(youtube_url)
405
 
406
- if not transcript_text and YOUTUBE_API_KEY:
407
  st.text("Fetching Captions via YouTube Data API...Started...βœ…βœ…βœ…")
408
  transcript_text = fetch_youtube_captions_api(video_id, YOUTUBE_API_KEY)
409
 
@@ -416,7 +503,7 @@ if process_youtube_clicked:
416
  "Solutions:\n"
417
  "- Ensure captions are enabled for the video by checking the video settings on YouTube (gear icon > Subtitles/CC > Enable if available).\n"
418
  "- Regenerate and upload a fresh cookies.txt file (see instructions above).\n"
419
- "- Ensure YOUTUBE_API_KEY is set in Spaces secrets (Settings > Secrets > Add YOUTUBE_API_KEY).\n"
420
  "- Try a different video (e.g., https://www.youtube.com/watch?v=dQw4w9WgXcQ, which has transcripts available).\n"
421
  "- Test locally to rule out Hugging Face Spaces IP restrictions by running: pip install -r requirements.txt && streamlit run app.py"
422
  )
 
6
  from langchain.text_splitter import RecursiveCharacterTextSplitter
7
  from langchain_core.documents import Document
8
  import os
9
+ import json
10
  from langchain_groq import ChatGroq
11
  from langchain.chains.qa_with_sources.retrieval import RetrievalQAWithSourcesChain
12
  from langchain.prompts import PromptTemplate
 
16
  import re
17
  from googleapiclient.discovery import build
18
  from googleapiclient.errors import HttpError
19
+ from google_auth_oauthlib.flow import InstalledAppFlow
20
+ from google.auth.transport.requests import Request
21
+ from google.oauth2.credentials import Credentials
22
 
23
  # Load environment variables (optional)
24
  load_dotenv()
25
 
26
  # Hardcoded Groq API key
27
  GROQ_API_KEY = "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"
28
+ # YouTube API key (to be set in Hugging Face Spaces secrets, optional if using OAuth)
29
  YOUTUBE_API_KEY = os.getenv("YOUTUBE_API_KEY")
30
 
31
+ # Path to store OAuth credentials
32
+ CREDENTIALS_FILE = "youtube_credentials.json"
33
+ CLIENT_SECRETS_FILE = "client_secrets.json"
34
+
35
  # Custom CSS
36
  st.markdown("""
37
  <style>
 
215
  st.error(f"Error fetching transcript with youtube-transcript-api: {str(e)}")
216
  return None
217
 
218
+ # Function to get YouTube API credentials
219
+ def get_youtube_credentials():
220
+ creds = None
221
+ if os.path.exists(CREDENTIALS_FILE):
222
+ creds = Credentials.from_authorized_user_file(CREDENTIALS_FILE, scopes=['https://www.googleapis.com/auth/youtube.force-ssl'])
223
+
224
+ if not creds or not creds.valid:
225
+ if creds and creds.expired and creds.refresh_token:
226
+ creds.refresh(Request())
227
+ else:
228
+ if os.path.exists(CLIENT_SECRETS_FILE):
229
+ st.warning("Attempting to authenticate with YouTube Data API. This may not work in Hugging Face Spaces due to redirect URI limitations.")
230
+ flow = InstalledAppFlow.from_client_secrets_file(
231
+ CLIENT_SECRETS_FILE,
232
+ scopes=['https://www.googleapis.com/auth/youtube.force-ssl']
233
+ )
234
+ # This will fail in Hugging Face Spaces because it can't open a browser
235
+ creds = flow.run_local_server(port=0)
236
+ with open(CREDENTIALS_FILE, 'w') as token_file:
237
+ token_file.write(creds.to_json())
238
+ else:
239
+ st.warning(
240
+ f"{CLIENT_SECRETS_FILE} not found. To use OAuth 2.0 for YouTube Data API:\n"
241
+ "1. Go to https://console.developers.google.com/.\n"
242
+ "2. Create a project, enable YouTube Data API v3, and create OAuth 2.0 credentials.\n"
243
+ "3. Download the credentials as 'client_secrets.json'.\n"
244
+ "4. Run the app locally: pip install -r requirements.txt && streamlit run app.py\n"
245
+ "5. Authenticate via the browser prompt to generate youtube_credentials.json.\n"
246
+ "6. Upload youtube_credentials.json to your Hugging Face Space via the Files tab."
247
+ )
248
+ return None
249
+
250
+ return creds
251
+
252
+ # Function to fetch captions using YouTube Data API (with OAuth 2.0 or API key fallback)
253
+ def fetch_youtube_captions_api(video_id, api_key=None):
254
+ # First, try OAuth 2.0 if credentials are available
255
+ creds = get_youtube_credentials()
256
+ if creds:
257
+ try:
258
+ youtube = build('youtube', 'v3', credentials=creds)
259
+ captions = youtube.captions().list(
260
+ part='snippet',
261
+ videoId=video_id
262
+ ).execute()
263
+
264
+ caption_id = None
265
+ for item in captions.get('items', []):
266
+ if item['snippet']['language'] == 'en':
267
+ caption_id = item['id']
268
+ break
269
+ elif item['snippet']['language'] in ['en-US', 'en-GB']:
270
+ caption_id = item['id']
271
+ break
272
+
273
+ if not caption_id:
274
+ st.warning("No English captions found via YouTube Data API.")
275
+ return None
276
+
277
+ # Download captions using OAuth 2.0 credentials
278
+ caption_content = youtube.captions().download(
279
+ id=caption_id,
280
+ tfmt='srt'
281
+ ).execute()
282
+
283
+ # The response is a binary string, decode it
284
+ caption_text = caption_content.decode('utf-8')
285
+ # Parse SRT format to extract text
286
+ lines = caption_text.split('\n')
287
+ text_lines = []
288
+ for line in lines:
289
+ if line.strip() and not line.isdigit() and not re.match(r'\d{2}:\d{2}:\d{2},\d{3} --> \d{2}:\d{2}:\d{2},\d{3}', line):
290
+ text_lines.append(line.strip())
291
+
292
+ return " ".join(text_lines)
293
+
294
+ except HttpError as e:
295
+ st.error(f"Error fetching captions with YouTube Data API (OAuth 2.0): {str(e)}")
296
+ return None
297
+
298
+ # Fallback to API key if OAuth fails or credentials are not available
299
  if not api_key:
300
+ st.warning("YOUTUBE_API_KEY not set and OAuth 2.0 credentials not available. Skipping YouTube Data API fallback.")
301
  return None
302
  try:
303
  youtube = build('youtube', 'v3', developerKey=api_key)
 
324
  "English captions are available for this video but cannot be fetched with an API key alone. "
325
  "Downloading captions requires OAuth 2.0 authentication, which is not supported in Hugging Face Spaces without user interaction. "
326
  "To fetch captions:\n"
327
+ "- Follow the instructions above to generate youtube_credentials.json locally and upload it.\n"
328
  "- Or try a video with transcripts available (e.g., https://www.youtube.com/watch?v=dQw4w9WgXcQ)."
329
  )
330
  return None
331
 
332
  except HttpError as e:
333
+ st.error(f"Error fetching captions with YouTube Data API (API Key): {str(e)}")
334
  return None
335
 
336
  # Function to extract subtitles using yt-dlp with cookies
 
487
 
488
  if not transcript_text:
489
  st.warning("Transcripts are disabled or unavailable. Attempting to fetch closed captions...")
490
+ st.text("Fetching Closed Captions with yt-dlp...Started...βœ…βœ…βœ…")
491
  transcript_text = extract_subtitles_with_ytdlp(youtube_url)
492
 
493
+ if not transcript_text:
494
  st.text("Fetching Captions via YouTube Data API...Started...βœ…βœ…βœ…")
495
  transcript_text = fetch_youtube_captions_api(video_id, YOUTUBE_API_KEY)
496
 
 
503
  "Solutions:\n"
504
  "- Ensure captions are enabled for the video by checking the video settings on YouTube (gear icon > Subtitles/CC > Enable if available).\n"
505
  "- Regenerate and upload a fresh cookies.txt file (see instructions above).\n"
506
+ "- Set up OAuth 2.0 credentials by following the instructions above to download captions directly.\n"
507
  "- Try a different video (e.g., https://www.youtube.com/watch?v=dQw4w9WgXcQ, which has transcripts available).\n"
508
  "- Test locally to rule out Hugging Face Spaces IP restrictions by running: pip install -r requirements.txt && streamlit run app.py"
509
  )