Final_Assignment_AgentJasper

Sleeping

File size: 7,281 Bytes
from typing import TypedDict, Annotated
import os
from langchain_community.tools import DuckDuckGoSearchRun
from langchain_community.document_loaders import WikipediaLoader, YoutubeLoader
from langchain_community.document_loaders.youtube import TranscriptFormat
from pytube import YouTube
from langgraph.graph.message import add_messages
from langchain_core.messages import AnyMessage, HumanMessage, AIMessage
from langgraph.prebuilt import ToolNode
from langchain_openai import ChatOpenAI
from langgraph.graph import START, StateGraph
from langfuse.langchain import CallbackHandler
from langgraph.prebuilt import tools_condition
from langchain_huggingface import HuggingFaceEndpoint, ChatHuggingFace
from langchain_core.tools import tool

# Web search tool using DuckDuckGo
search_tool = DuckDuckGoSearchRun()

# Create Wikipedia search tool using WikipediaLoader
@tool
def search_wikipedia(query: str) -> str:
    """Search Wikipedia for information about a topic.
    
    Args:
        query: The search query or topic to look up on Wikipedia
        
    Returns:
        str: The Wikipedia content related to the query
    """
    try:
        # Load Wikipedia documents for the query
        loader = WikipediaLoader(query=query, load_max_docs=2)
        docs = loader.load()
        
        if not docs:
            return f"No Wikipedia articles found for query: {query}"
        
        # Combine the content from the documents
        content = ""
        for doc in docs:
            content += f"Title: {doc.metadata.get('title', 'Unknown')}\n"
            content += f"Content: {doc.page_content}...\n\n"
        
        return content
    except Exception as e:
        return f"Error searching Wikipedia: {str(e)}"

# Create YouTube transcript analysis tool
@tool
def analyze_youtube_video(video_url: str) -> str:
    """Analyze a YouTube video by loading and processing its transcript.
    
    Args:
        video_url: The YouTube video URL to analyze
        
    Returns:
        str: The transcript content of the YouTube video
    """
    # try:
        # # Method 1: Try with basic YoutubeLoader first
        # try:
        #     loader = YoutubeLoader.from_youtube_url(
        #         video_url,
        #         add_video_info=True,
        #         language=["en", "en-US", "en-GB"]  # Try multiple English variants
        #     )
        #     docs = loader.load()
            
        #     if docs:
        #         content = ""
        #         for doc in docs:
        #             title = doc.metadata.get('title', 'Unknown Video')
        #             author = doc.metadata.get('author', 'Unknown Author')
        #             length = doc.metadata.get('length', 'Unknown')
                    
        #             content += f"Video Title: {title}\n"
        #             content += f"Author: {author}\n"
        #             content += f"Length: {length} seconds\n"
        #             content += f"Transcript:\n{doc.page_content}\n\n"
                
        #         return content
        # except Exception as e1:
        #     print(f"Method 1 failed: {e1}")
            
            # Method 2: Try without video info
            # try:
            #     loader = YoutubeLoader.from_youtube_url(
            #         video_url,
            #         add_video_info=False,
            #         language=["en"]
            #     )
            #     docs = loader.load()
                
            #     if docs:
            #         content = f"Video URL: {video_url}\n"
            #         content += f"Transcript:\n{docs[0].page_content}\n\n"
            #         return content
            # except Exception as e2:
            #     print(f"Method 2 failed: {e2}")
                
            #     # Method 3: Try with chunked format
    try:
        loader = YoutubeLoader.from_youtube_url(
            video_url,
            add_video_info=False,
            transcript_format=TranscriptFormat.CHUNKS,
            chunk_size_seconds=60
        )
        docs = loader.load()
        
        if docs:
            content = f"Video URL: {video_url}\n"
            content += "Transcript (Chunked):\n"
            for i, doc in enumerate(docs[:5]):  # Limit to first 5 chunks
                content += f"Chunk {i+1}: {doc.page_content}\n"
            return content
    except Exception as e:
        print(f"Analyze video failed: {e}")

# Initialize Langfuse CallbackHandler globally
def get_langfuse_handler():
    """Get configured Langfuse handler"""
    # Langfuse will automatically read LANGFUSE_PUBLIC_KEY, LANGFUSE_SECRET_KEY, and LANGFUSE_HOST from environment
    return CallbackHandler()

def build_jasper():
    # Generate the chat interface, including the tools
    # llm = HuggingFaceEndpoint(
    #     repo_id="Qwen/Qwen2.5-Coder-32B-Instruct",
    #     huggingfacehub_api_token=os.getenv("HUGGINGFACE_API_TOKEN"),
    # )

    tools = [search_tool, search_wikipedia, analyze_youtube_video]

    # llm = HuggingFaceEndpoint(
    #     repo_id="Qwen/Qwen2.5-Omni-3B",
    #     huggingfacehub_api_token=os.getenv("HUGGINGFACE_API_TOKEN"),
    # )
    # chat = ChatHuggingFace(llm=llm, verbose=True)
    # chat_with_tools = chat.bind_tools(tools)

    # Set your OpenAI API key here
    llm = ChatOpenAI(
    model="gpt-4o", 
    temperature=0,
    api_key=os.getenv("OPENAI_API_KEY")
    )
    chat_with_tools = llm.bind_tools(tools, parallel_tool_calls=False)


    # Generate the AgentState and Agent graph
    class AgentState(TypedDict):
        messages: Annotated[list[AnyMessage], add_messages]


    def assistant(state: AgentState):
        return {
            "messages": [chat_with_tools.invoke(state["messages"])],
        }

    ## The graph
    builder = StateGraph(AgentState)

    # Define nodes: these do the work
    builder.add_node("assistant", assistant)
    builder.add_node("tools", ToolNode(tools))

    # Define edges: these determine how the control flow moves
    builder.add_edge(START, "assistant")
    builder.add_conditional_edges(
        "assistant",
        # If the latest message requires a tool, route to tools
        # Otherwise, provide a direct response
        tools_condition,
    )
    builder.add_edge("tools", "assistant")
    
    # Compile the graph without callback parameter
    jasper = builder.compile()
    print("Langfuse tracing enabled - traces will be available in your Langfuse dashboard")
    return jasper

def run_jasper():
    jasper = build_jasper()
    messages = [HumanMessage(content="Examine the video at https://www.youtube.com/watch?v=1htKBjuUWec.\n\nWhat does Teal'c say in response to the question \"Isn't that hot?\"")]
    
    # Get Langfuse handler for tracing
    langfuse_handler = get_langfuse_handler()
    
    # Add trace metadata for this specific run
    response = jasper.invoke(
        {"messages": messages},
        config={
            "callbacks": [langfuse_handler],
            "metadata": {
                "trace_name": "YouTube_Video_Analysis",
                "user_id": "jasper-user",
                "session_id": "jasper-agent-session"
            }
        }
    )

    print("Jasper's Response:")
    print(response['messages'][-1].content)

if __name__ == "__main__":
    run_jasper()