ORromu commited on
Commit
474b8e9
·
verified ·
1 Parent(s): 6352947

Update tool.py

Browse files
Files changed (1) hide show
  1. tool.py +44 -2
tool.py CHANGED
@@ -6,14 +6,16 @@ from langchain_community.document_loaders.arxiv import ArxivLoader
6
  from langchain_community.document_loaders.pubmed import PubMedLoader
7
  from typing import Optional
8
 
 
9
  import os
10
  import tempfile
11
  import requests
12
- from urllib.parse import urlparse
13
  import pytesseract
14
  from PIL import Image
15
  import pandas as pd
16
  import uuid
 
17
 
18
  ## Simple algebra tools
19
  @tool
@@ -257,4 +259,44 @@ def analyze_excel_file(file_path: str, query: str) -> str:
257
  return result
258
 
259
  except Exception as e:
260
- return f"Error analyzing Excel file: {str(e)}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
  from langchain_community.document_loaders.pubmed import PubMedLoader
7
  from typing import Optional
8
 
9
+
10
  import os
11
  import tempfile
12
  import requests
13
+ from urllib.parse import urlparse, parse_qs
14
  import pytesseract
15
  from PIL import Image
16
  import pandas as pd
17
  import uuid
18
+ from youtube_transcript_api import YouTubeTranscriptApi
19
 
20
  ## Simple algebra tools
21
  @tool
 
259
  return result
260
 
261
  except Exception as e:
262
+ return f"Error analyzing Excel file: {str(e)}"
263
+
264
+
265
+ ## Analyze Youtube Transcript tools
266
+
267
+ def extract_video_id(youtube_url: str) -> str | None:
268
+ """Extract the video ID from a YouTube URL.
269
+
270
+ Supports standard and shortened formats like:
271
+ - https://www.youtube.com/watch?v=VIDEO_ID
272
+ - https://youtu.be/VIDEO_ID
273
+ """
274
+ try:
275
+ parsed_url = urlparse(youtube_url)
276
+ host = parsed_url.hostname
277
+
278
+ if host in ("www.youtube.com", "youtube.com"):
279
+ return parse_qs(parsed_url.query).get("v", [None])[0]
280
+ elif host == "youtu.be":
281
+ return parsed_url.path.strip("/")
282
+ except Exception:
283
+ return None
284
+
285
+ return None
286
+
287
+ @tool
288
+ def get_youtube_transcript(youtube_url: str) -> str:
289
+ """Returns the transcript of a YouTube video as plain text.
290
+
291
+ Use this tool to extract spoken words from videos for Q&A, summarization,
292
+ or analysis. This does not include visual or on-screen content.
293
+ """
294
+ video_id = extract_video_id(youtube_url)
295
+ if not video_id:
296
+ return "Invalid or unsupported YouTube URL format."
297
+
298
+ try:
299
+ transcript = YouTubeTranscriptApi.get_transcript(video_id)
300
+ return " ".join(entry["text"] for entry in transcript)
301
+ except Exception as e:
302
+ return f"Transcript unavailable: {str(e)}"