Spaces:
Configuration error
Configuration error
updates
Browse files- agent.py +37 -19
- requirements.txt +2 -0
agent.py
CHANGED
|
@@ -1,6 +1,7 @@
|
|
| 1 |
import os
|
|
|
|
| 2 |
|
| 3 |
-
from langchain_community.document_loaders import WikipediaLoader
|
| 4 |
|
| 5 |
# --- Langchain / Langraph ---
|
| 6 |
from langchain_community.tools.tavily_search import TavilySearchResults
|
|
@@ -19,28 +20,20 @@ from langgraph.graph.message import add_messages
|
|
| 19 |
from langgraph.prebuilt import ToolNode, tools_condition
|
| 20 |
|
| 21 |
|
| 22 |
-
@tool
|
| 23 |
-
def search_web_sources(query: str) -> dict:
|
| 24 |
-
"""
|
| 25 |
-
Perform a web search using Tavily and return up to 3 relevant documents.
|
| 26 |
-
This tool is useful for answering research-based queries that require
|
| 27 |
-
up-to-date information from trusted sources.
|
| 28 |
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
str: Formatted web search results with metadata and content.
|
| 34 |
-
"""
|
| 35 |
-
tavily_tool = TavilySearchResults(max_results=3)
|
| 36 |
-
search_docs = tavily_tool.invoke({"query": query})
|
| 37 |
-
formatted_search_docs = "\n\n---\n\n".join(
|
| 38 |
[
|
| 39 |
-
f'<Document source="{doc.metadata["source"]}" page="{doc.metadata.get("page", "")}"
|
| 40 |
for doc in search_docs
|
| 41 |
]
|
| 42 |
)
|
| 43 |
-
return {"web_results":
|
| 44 |
|
| 45 |
|
| 46 |
@tool
|
|
@@ -63,6 +56,26 @@ def search_wikipedia(query: str) -> dict:
|
|
| 63 |
return {"wiki_results": f"Error fetching Wikipedia article: {e}"}
|
| 64 |
|
| 65 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 66 |
@tool
|
| 67 |
def run_python_code(code: str) -> str:
|
| 68 |
"""Execute Python code and return the result.
|
|
@@ -96,7 +109,12 @@ Your response must always begin with: FINAL ANSWER:
|
|
| 96 |
def build_agent_graph(provider: str = "groq"):
|
| 97 |
|
| 98 |
# Define toolset
|
| 99 |
-
tools = [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 100 |
|
| 101 |
# Instantiate LLM
|
| 102 |
os.environ["GROQ_API_KEY"]
|
|
|
|
| 1 |
import os
|
| 2 |
+
from typing import Annotated
|
| 3 |
|
| 4 |
+
from langchain_community.document_loaders import WikipediaLoader, YouTubeLoader
|
| 5 |
|
| 6 |
# --- Langchain / Langraph ---
|
| 7 |
from langchain_community.tools.tavily_search import TavilySearchResults
|
|
|
|
| 20 |
from langgraph.prebuilt import ToolNode, tools_condition
|
| 21 |
|
| 22 |
|
| 23 |
+
@tool("search_web_sources")
|
| 24 |
+
def search_web_sources(query: Annotated[str, "Search query string"]) -> dict:
|
| 25 |
+
"""Performs a web search and returns up to 3 formatted documents with content and source."""
|
|
|
|
|
|
|
|
|
|
| 26 |
|
| 27 |
+
if not os.environ.get("TAVILY_API_KEY"):
|
| 28 |
+
raise EnvironmentError("TAVILY_API_KEY is not set in environment variables.")
|
| 29 |
+
search_docs = TavilySearchResults(max_results=3).invoke({"query": query})
|
| 30 |
+
formatted = "\n\n---\n\n".join(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 31 |
[
|
| 32 |
+
f'<Document source="{doc.metadata["source"]}" page="{doc.metadata.get("page", "")}">\n{doc.page_content}\n</Document>'
|
| 33 |
for doc in search_docs
|
| 34 |
]
|
| 35 |
)
|
| 36 |
+
return {"web_results": formatted}
|
| 37 |
|
| 38 |
|
| 39 |
@tool
|
|
|
|
| 56 |
return {"wiki_results": f"Error fetching Wikipedia article: {e}"}
|
| 57 |
|
| 58 |
|
| 59 |
+
@tool
|
| 60 |
+
def extract_youtube_transcript(video_url: str) -> dict:
|
| 61 |
+
"""Extract transcript from a YouTube video given its URL using LangChain's YouTubeLoader."""
|
| 62 |
+
try:
|
| 63 |
+
loader = YouTubeLoader(video_urls=[video_url])
|
| 64 |
+
docs = loader.load()
|
| 65 |
+
if docs:
|
| 66 |
+
formatted_docs = "\n\n---\n\n".join(
|
| 67 |
+
[
|
| 68 |
+
f'<YouTubeTranscript url="{video_url}">\n{doc.page_content}\n</YouTubeTranscript>'
|
| 69 |
+
for doc in docs
|
| 70 |
+
]
|
| 71 |
+
)
|
| 72 |
+
return {"transcript_results": formatted_docs}
|
| 73 |
+
else:
|
| 74 |
+
return {"transcript_results": "No transcript found."}
|
| 75 |
+
except Exception as e:
|
| 76 |
+
return {"transcript_results": f"Error fetching YouTube transcript: {e}"}
|
| 77 |
+
|
| 78 |
+
|
| 79 |
@tool
|
| 80 |
def run_python_code(code: str) -> str:
|
| 81 |
"""Execute Python code and return the result.
|
|
|
|
| 109 |
def build_agent_graph(provider: str = "groq"):
|
| 110 |
|
| 111 |
# Define toolset
|
| 112 |
+
tools = [
|
| 113 |
+
search_web_sources,
|
| 114 |
+
search_wikipedia,
|
| 115 |
+
extract_youtube_transcript,
|
| 116 |
+
run_python_code,
|
| 117 |
+
]
|
| 118 |
|
| 119 |
# Instantiate LLM
|
| 120 |
os.environ["GROQ_API_KEY"]
|
requirements.txt
CHANGED
|
@@ -9,3 +9,5 @@ langchain-experimental
|
|
| 9 |
langchain-tavily
|
| 10 |
langgraph
|
| 11 |
tavily-python
|
|
|
|
|
|
|
|
|
| 9 |
langchain-tavily
|
| 10 |
langgraph
|
| 11 |
tavily-python
|
| 12 |
+
wikipedia
|
| 13 |
+
youtube-transcript-api
|