Spaces:
Sleeping
Sleeping
| from langchain_google_community import GooglePlacesTool | |
| from langchain_community.agent_toolkits.load_tools import load_tools | |
| from smolagents.tools import Tool, tool | |
| from youtube_transcript_api import YouTubeTranscriptApi | |
| from youtube_transcript_api.proxies import WebshareProxyConfig | |
| import os | |
| google_map_tool = Tool.from_langchain(GooglePlacesTool()) | |
| from langchain_community.tools import WikipediaQueryRun | |
| from langchain_community.utilities import WikipediaAPIWrapper | |
| wikipedia_tool = Tool.from_langchain(WikipediaQueryRun(api_wrapper=WikipediaAPIWrapper(top_k_results=2))) | |
| arxiv_tools = [Tool.from_langchain(tool) for tool in load_tools(["arxiv"])] | |
| community_tools = [google_map_tool] | |
| search_tools = [wikipedia_tool, *arxiv_tools] | |
| def get_youtube_transcript_from_url(video_url: str)->str: | |
| """ | |
| Get the transcript of a YouTube video using proxy configuration | |
| Args: | |
| video_url: The URL of the YouTube video (e.g. https://www.youtube.com/watch?v=dQw4w9WgXcQ) | |
| Returns: | |
| The transcript of the YouTube video as a string | |
| """ | |
| video_id = video_url.split("=")[1] | |
| try: | |
| # Get proxy credentials from environment variables | |
| proxy_username = os.getenv("WEBSHARE_PROXY_USERNAME") | |
| proxy_password = os.getenv("WEBSHARE_PROXY_PASSWORD") | |
| # Configure proxy if credentials are available | |
| if proxy_username and proxy_password: | |
| proxy_config = WebshareProxyConfig( | |
| proxy_username=proxy_username, | |
| proxy_password=proxy_password, | |
| ) | |
| ytt_api = YouTubeTranscriptApi(proxy_config=proxy_config) | |
| else: | |
| ytt_api = YouTubeTranscriptApi() | |
| fetched_transcript = ytt_api.fetch(video_id) | |
| # is iterable | |
| transcript = "" | |
| for snippet in fetched_transcript: | |
| transcript += f"{snippet['text']}\n" | |
| return transcript | |
| except Exception as e: | |
| #Get manual transcript | |
| def get_manual_transcript(video_id: str)->str: | |
| curr_dir = os.path.dirname(os.path.abspath(__file__)) | |
| transcript_path = os.path.join(curr_dir, "transcripts", f"{video_id}.txt") | |
| with open(transcript_path, "r") as f: | |
| transcript = f.read() | |
| return transcript | |
| transcript = get_manual_transcript(video_id) | |
| return transcript | |