Spaces:
Sleeping
Sleeping
ernani
commited on
Commit
·
2d9eaee
1
Parent(s):
30c3969
Fixing youtube - in case it has network issues it still can access the offline transcripts
Browse files- manage_agents.py +0 -9
- requirements.txt +1 -2
- temp_youtube/1htKBjuUWec_transcript.txt +7 -0
- tools.py +32 -21
manage_agents.py
CHANGED
|
@@ -1,7 +1,6 @@
|
|
| 1 |
from typing import Dict, List, Optional, Tuple
|
| 2 |
from langchain.agents import AgentExecutor
|
| 3 |
from langchain_openai import ChatOpenAI
|
| 4 |
-
from langchain_huggingface import HuggingFaceEndpoint, ChatHuggingFace
|
| 5 |
from langchain.memory import ConversationBufferMemory
|
| 6 |
from langchain.chains import LLMChain
|
| 7 |
from langchain.prompts import PromptTemplate
|
|
@@ -291,14 +290,6 @@ class StateGraphAgent:
|
|
| 291 |
|
| 292 |
def __init__(self):
|
| 293 |
self.llm = ChatOpenAI(temperature=0, model="gpt-4o-mini")
|
| 294 |
-
# llm = HuggingFaceEndpoint(
|
| 295 |
-
# repo_id="Qwen/Qwen2.5-Coder-32B-Instruct",
|
| 296 |
-
# #repo_id="meta-llama/Llama-3.3-70B-Instruct",
|
| 297 |
-
|
| 298 |
-
# huggingfacehub_api_token=HF_TOKEN,
|
| 299 |
-
# )
|
| 300 |
-
|
| 301 |
-
# self.llm = ChatHuggingFace(llm=llm, verbose=True)
|
| 302 |
|
| 303 |
# Initialize tools
|
| 304 |
self.wikipedia_tool = WikipediaTool()
|
|
|
|
| 1 |
from typing import Dict, List, Optional, Tuple
|
| 2 |
from langchain.agents import AgentExecutor
|
| 3 |
from langchain_openai import ChatOpenAI
|
|
|
|
| 4 |
from langchain.memory import ConversationBufferMemory
|
| 5 |
from langchain.chains import LLMChain
|
| 6 |
from langchain.prompts import PromptTemplate
|
|
|
|
| 290 |
|
| 291 |
def __init__(self):
|
| 292 |
self.llm = ChatOpenAI(temperature=0, model="gpt-4o-mini")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 293 |
|
| 294 |
# Initialize tools
|
| 295 |
self.wikipedia_tool = WikipediaTool()
|
requirements.txt
CHANGED
|
@@ -4,8 +4,7 @@ duckduckgo-search>=3.0.0
|
|
| 4 |
gradio>=4.0.0
|
| 5 |
langchain>=0.1.0
|
| 6 |
langchain_community>=0.1.0
|
| 7 |
-
|
| 8 |
-
langchain-huggingface
|
| 9 |
langchain_openai>=0.1.0
|
| 10 |
langgraph
|
| 11 |
librosa>=0.10.0
|
|
|
|
| 4 |
gradio>=4.0.0
|
| 5 |
langchain>=0.1.0
|
| 6 |
langchain_community>=0.1.0
|
| 7 |
+
langchain_core>=0.1.0
|
|
|
|
| 8 |
langchain_openai>=0.1.0
|
| 9 |
langgraph
|
| 10 |
librosa>=0.10.0
|
temp_youtube/1htKBjuUWec_transcript.txt
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[0.03s]: Wow this coffee's great I was just
|
| 2 |
+
[3.84s]: thinking that
|
| 3 |
+
[5.42s]: yeah is that cinnamon chicory
|
| 4 |
+
[17.72s]: tea oak
|
| 5 |
+
[21.54s]: [Music]
|
| 6 |
+
[24.68s]: isn't that hot
|
| 7 |
+
[26.72s]: extremely
|
tools.py
CHANGED
|
@@ -187,29 +187,40 @@ class YouTubeVideoTool(BaseContentTool):
|
|
| 187 |
"question_context": question
|
| 188 |
}
|
| 189 |
|
| 190 |
-
#
|
|
|
|
|
|
|
|
|
|
| 191 |
transcript_text = None
|
| 192 |
|
| 193 |
-
#
|
| 194 |
-
|
| 195 |
-
|
| 196 |
-
|
| 197 |
-
|
| 198 |
-
|
| 199 |
-
|
| 200 |
-
|
| 201 |
-
|
| 202 |
-
|
| 203 |
-
|
| 204 |
-
|
| 205 |
-
|
| 206 |
-
|
| 207 |
-
|
| 208 |
-
|
| 209 |
-
|
| 210 |
-
|
| 211 |
-
|
| 212 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 213 |
|
| 214 |
# If we got transcript text, create and return document
|
| 215 |
if transcript_text:
|
|
|
|
| 187 |
"question_context": question
|
| 188 |
}
|
| 189 |
|
| 190 |
+
# Prepare transcript path
|
| 191 |
+
temp_dir = "temp_youtube"
|
| 192 |
+
os.makedirs(temp_dir, exist_ok=True)
|
| 193 |
+
transcript_path = os.path.join(temp_dir, f"{video_id}_transcript.txt")
|
| 194 |
transcript_text = None
|
| 195 |
|
| 196 |
+
# If transcript file exists, read it and skip fetching
|
| 197 |
+
if os.path.exists(transcript_path):
|
| 198 |
+
with open(transcript_path, "r", encoding="utf-8") as f:
|
| 199 |
+
transcript_text = f.read()
|
| 200 |
+
else:
|
| 201 |
+
# Try multiple methods to get transcript
|
| 202 |
+
# Method 1: Try YouTube Transcript API directly
|
| 203 |
+
transcript_text = self._get_transcript_with_api(video_id)
|
| 204 |
+
# Method 2: Try YoutubeLoader if Method 1 failed
|
| 205 |
+
if not transcript_text:
|
| 206 |
+
try:
|
| 207 |
+
loader = YoutubeLoader.from_youtube_url(
|
| 208 |
+
clean_url,
|
| 209 |
+
add_video_info=False,
|
| 210 |
+
language=["en"]
|
| 211 |
+
)
|
| 212 |
+
documents = loader.load()
|
| 213 |
+
if documents:
|
| 214 |
+
transcript_text = documents[0].page_content
|
| 215 |
+
# Add video info from metadata if available
|
| 216 |
+
if documents[0].metadata:
|
| 217 |
+
metadata.update(documents[0].metadata)
|
| 218 |
+
except Exception as e:
|
| 219 |
+
pass
|
| 220 |
+
# Save transcript to temp_youtube directory if available
|
| 221 |
+
if transcript_text:
|
| 222 |
+
with open(transcript_path, "w", encoding="utf-8") as f:
|
| 223 |
+
f.write(transcript_text)
|
| 224 |
|
| 225 |
# If we got transcript text, create and return document
|
| 226 |
if transcript_text:
|