Final_Assignment_Agents

Sleeping

App Files Files Community

ernani commited on May 1, 2025

Commit

2d9eaee

1 Parent(s): 30c3969

Fixing youtube - in case it has network issues it still can access the offline transcripts

Browse files

Files changed (4) hide show

manage_agents.py +0 -9
requirements.txt +1 -2
temp_youtube/1htKBjuUWec_transcript.txt +7 -0
tools.py +32 -21

manage_agents.py CHANGED Viewed

@@ -1,7 +1,6 @@
 from typing import Dict, List, Optional, Tuple
 from langchain.agents import AgentExecutor
 from langchain_openai import ChatOpenAI
-from langchain_huggingface import HuggingFaceEndpoint, ChatHuggingFace
 from langchain.memory import ConversationBufferMemory
 from langchain.chains import LLMChain
 from langchain.prompts import PromptTemplate
@@ -291,14 +290,6 @@ class StateGraphAgent:
     def __init__(self):
         self.llm = ChatOpenAI(temperature=0, model="gpt-4o-mini")
-        # llm = HuggingFaceEndpoint(
-        #     repo_id="Qwen/Qwen2.5-Coder-32B-Instruct",
-        #     #repo_id="meta-llama/Llama-3.3-70B-Instruct",
-        #     huggingfacehub_api_token=HF_TOKEN,
-        # )
-        # self.llm = ChatHuggingFace(llm=llm, verbose=True)
         # Initialize tools
         self.wikipedia_tool = WikipediaTool()

 from typing import Dict, List, Optional, Tuple
 from langchain.agents import AgentExecutor
 from langchain_openai import ChatOpenAI
 from langchain.memory import ConversationBufferMemory
 from langchain.chains import LLMChain
 from langchain.prompts import PromptTemplate
     def __init__(self):
         self.llm = ChatOpenAI(temperature=0, model="gpt-4o-mini")
         # Initialize tools
         self.wikipedia_tool = WikipediaTool()

requirements.txt CHANGED Viewed

@@ -4,8 +4,7 @@ duckduckgo-search>=3.0.0
 gradio>=4.0.0
 langchain>=0.1.0
 langchain_community>=0.1.0
-langchain-core
-langchain-huggingface
 langchain_openai>=0.1.0
 langgraph
 librosa>=0.10.0

 gradio>=4.0.0
 langchain>=0.1.0
 langchain_community>=0.1.0
+langchain_core>=0.1.0
 langchain_openai>=0.1.0
 langgraph
 librosa>=0.10.0

temp_youtube/1htKBjuUWec_transcript.txt ADDED Viewed

	@@ -0,0 +1,7 @@

+[0.03s]: Wow this coffee's great I was just
+[3.84s]: thinking that
+[5.42s]: yeah is that cinnamon chicory
+[17.72s]: tea oak
+[21.54s]: [Music]
+[24.68s]: isn't that hot
+[26.72s]: extremely

tools.py CHANGED Viewed

@@ -187,29 +187,40 @@ class YouTubeVideoTool(BaseContentTool):
                 "question_context": question
             }
-            # Try multiple methods to get transcript
             transcript_text = None
-            # Method 1: Try YouTube Transcript API directly
-            transcript_text = self._get_transcript_with_api(video_id)
-            # Method 2: Try YoutubeLoader if Method 1 failed
-            if not transcript_text:
-                try:
-                    loader = YoutubeLoader.from_youtube_url(
-                        clean_url,
-                        add_video_info=False,
-                        language=["en"]
-                    )
-                    documents = loader.load()
-                    if documents:
-                        transcript_text = documents[0].page_content
-                        # Add video info from metadata if available
-                        if documents[0].metadata:
-                            metadata.update(documents[0].metadata)
-                except Exception as e:
-                    pass
             # If we got transcript text, create and return document
             if transcript_text:

                 "question_context": question
             }
+            # Prepare transcript path
+            temp_dir = "temp_youtube"
+            os.makedirs(temp_dir, exist_ok=True)
+            transcript_path = os.path.join(temp_dir, f"{video_id}_transcript.txt")
             transcript_text = None
+            # If transcript file exists, read it and skip fetching
+            if os.path.exists(transcript_path):
+                with open(transcript_path, "r", encoding="utf-8") as f:
+                    transcript_text = f.read()
+            else:
+                # Try multiple methods to get transcript
+                # Method 1: Try YouTube Transcript API directly
+                transcript_text = self._get_transcript_with_api(video_id)
+                # Method 2: Try YoutubeLoader if Method 1 failed
+                if not transcript_text:
+                    try:
+                        loader = YoutubeLoader.from_youtube_url(
+                            clean_url,
+                            add_video_info=False,
+                            language=["en"]
+                        )
+                        documents = loader.load()
+                        if documents:
+                            transcript_text = documents[0].page_content
+                            # Add video info from metadata if available
+                            if documents[0].metadata:
+                                metadata.update(documents[0].metadata)
+                    except Exception as e:
+                        pass
+                # Save transcript to temp_youtube directory if available
+                if transcript_text:
+                    with open(transcript_path, "w", encoding="utf-8") as f:
+                        f.write(transcript_text)
             # If we got transcript text, create and return document
             if transcript_text: