File size: 7,281 Bytes
9738813 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 |
from typing import TypedDict, Annotated
import os
from langchain_community.tools import DuckDuckGoSearchRun
from langchain_community.document_loaders import WikipediaLoader, YoutubeLoader
from langchain_community.document_loaders.youtube import TranscriptFormat
from pytube import YouTube
from langgraph.graph.message import add_messages
from langchain_core.messages import AnyMessage, HumanMessage, AIMessage
from langgraph.prebuilt import ToolNode
from langchain_openai import ChatOpenAI
from langgraph.graph import START, StateGraph
from langfuse.langchain import CallbackHandler
from langgraph.prebuilt import tools_condition
from langchain_huggingface import HuggingFaceEndpoint, ChatHuggingFace
from langchain_core.tools import tool
# Web search tool using DuckDuckGo
search_tool = DuckDuckGoSearchRun()
# Create Wikipedia search tool using WikipediaLoader
@tool
def search_wikipedia(query: str) -> str:
"""Search Wikipedia for information about a topic.
Args:
query: The search query or topic to look up on Wikipedia
Returns:
str: The Wikipedia content related to the query
"""
try:
# Load Wikipedia documents for the query
loader = WikipediaLoader(query=query, load_max_docs=2)
docs = loader.load()
if not docs:
return f"No Wikipedia articles found for query: {query}"
# Combine the content from the documents
content = ""
for doc in docs:
content += f"Title: {doc.metadata.get('title', 'Unknown')}\n"
content += f"Content: {doc.page_content}...\n\n"
return content
except Exception as e:
return f"Error searching Wikipedia: {str(e)}"
# Create YouTube transcript analysis tool
@tool
def analyze_youtube_video(video_url: str) -> str:
"""Analyze a YouTube video by loading and processing its transcript.
Args:
video_url: The YouTube video URL to analyze
Returns:
str: The transcript content of the YouTube video
"""
# try:
# # Method 1: Try with basic YoutubeLoader first
# try:
# loader = YoutubeLoader.from_youtube_url(
# video_url,
# add_video_info=True,
# language=["en", "en-US", "en-GB"] # Try multiple English variants
# )
# docs = loader.load()
# if docs:
# content = ""
# for doc in docs:
# title = doc.metadata.get('title', 'Unknown Video')
# author = doc.metadata.get('author', 'Unknown Author')
# length = doc.metadata.get('length', 'Unknown')
# content += f"Video Title: {title}\n"
# content += f"Author: {author}\n"
# content += f"Length: {length} seconds\n"
# content += f"Transcript:\n{doc.page_content}\n\n"
# return content
# except Exception as e1:
# print(f"Method 1 failed: {e1}")
# Method 2: Try without video info
# try:
# loader = YoutubeLoader.from_youtube_url(
# video_url,
# add_video_info=False,
# language=["en"]
# )
# docs = loader.load()
# if docs:
# content = f"Video URL: {video_url}\n"
# content += f"Transcript:\n{docs[0].page_content}\n\n"
# return content
# except Exception as e2:
# print(f"Method 2 failed: {e2}")
# # Method 3: Try with chunked format
try:
loader = YoutubeLoader.from_youtube_url(
video_url,
add_video_info=False,
transcript_format=TranscriptFormat.CHUNKS,
chunk_size_seconds=60
)
docs = loader.load()
if docs:
content = f"Video URL: {video_url}\n"
content += "Transcript (Chunked):\n"
for i, doc in enumerate(docs[:5]): # Limit to first 5 chunks
content += f"Chunk {i+1}: {doc.page_content}\n"
return content
except Exception as e:
print(f"Analyze video failed: {e}")
# Initialize Langfuse CallbackHandler globally
def get_langfuse_handler():
"""Get configured Langfuse handler"""
# Langfuse will automatically read LANGFUSE_PUBLIC_KEY, LANGFUSE_SECRET_KEY, and LANGFUSE_HOST from environment
return CallbackHandler()
def build_jasper():
# Generate the chat interface, including the tools
# llm = HuggingFaceEndpoint(
# repo_id="Qwen/Qwen2.5-Coder-32B-Instruct",
# huggingfacehub_api_token=os.getenv("HUGGINGFACE_API_TOKEN"),
# )
tools = [search_tool, search_wikipedia, analyze_youtube_video]
# llm = HuggingFaceEndpoint(
# repo_id="Qwen/Qwen2.5-Omni-3B",
# huggingfacehub_api_token=os.getenv("HUGGINGFACE_API_TOKEN"),
# )
# chat = ChatHuggingFace(llm=llm, verbose=True)
# chat_with_tools = chat.bind_tools(tools)
# Set your OpenAI API key here
llm = ChatOpenAI(
model="gpt-4o",
temperature=0,
api_key=os.getenv("OPENAI_API_KEY")
)
chat_with_tools = llm.bind_tools(tools, parallel_tool_calls=False)
# Generate the AgentState and Agent graph
class AgentState(TypedDict):
messages: Annotated[list[AnyMessage], add_messages]
def assistant(state: AgentState):
return {
"messages": [chat_with_tools.invoke(state["messages"])],
}
## The graph
builder = StateGraph(AgentState)
# Define nodes: these do the work
builder.add_node("assistant", assistant)
builder.add_node("tools", ToolNode(tools))
# Define edges: these determine how the control flow moves
builder.add_edge(START, "assistant")
builder.add_conditional_edges(
"assistant",
# If the latest message requires a tool, route to tools
# Otherwise, provide a direct response
tools_condition,
)
builder.add_edge("tools", "assistant")
# Compile the graph without callback parameter
jasper = builder.compile()
print("Langfuse tracing enabled - traces will be available in your Langfuse dashboard")
return jasper
def run_jasper():
jasper = build_jasper()
messages = [HumanMessage(content="Examine the video at https://www.youtube.com/watch?v=1htKBjuUWec.\n\nWhat does Teal'c say in response to the question \"Isn't that hot?\"")]
# Get Langfuse handler for tracing
langfuse_handler = get_langfuse_handler()
# Add trace metadata for this specific run
response = jasper.invoke(
{"messages": messages},
config={
"callbacks": [langfuse_handler],
"metadata": {
"trace_name": "YouTube_Video_Analysis",
"user_id": "jasper-user",
"session_id": "jasper-agent-session"
}
}
)
print("Jasper's Response:")
print(response['messages'][-1].content)
if __name__ == "__main__":
run_jasper() |