File size: 7,281 Bytes
9738813
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
from typing import TypedDict, Annotated
import os
from langchain_community.tools import DuckDuckGoSearchRun
from langchain_community.document_loaders import WikipediaLoader, YoutubeLoader
from langchain_community.document_loaders.youtube import TranscriptFormat
from pytube import YouTube
from langgraph.graph.message import add_messages
from langchain_core.messages import AnyMessage, HumanMessage, AIMessage
from langgraph.prebuilt import ToolNode
from langchain_openai import ChatOpenAI
from langgraph.graph import START, StateGraph
from langfuse.langchain import CallbackHandler
from langgraph.prebuilt import tools_condition
from langchain_huggingface import HuggingFaceEndpoint, ChatHuggingFace
from langchain_core.tools import tool

# Web search tool using DuckDuckGo
search_tool = DuckDuckGoSearchRun()

# Create Wikipedia search tool using WikipediaLoader
@tool
def search_wikipedia(query: str) -> str:
    """Search Wikipedia for information about a topic.
    
    Args:
        query: The search query or topic to look up on Wikipedia
        
    Returns:
        str: The Wikipedia content related to the query
    """
    try:
        # Load Wikipedia documents for the query
        loader = WikipediaLoader(query=query, load_max_docs=2)
        docs = loader.load()
        
        if not docs:
            return f"No Wikipedia articles found for query: {query}"
        
        # Combine the content from the documents
        content = ""
        for doc in docs:
            content += f"Title: {doc.metadata.get('title', 'Unknown')}\n"
            content += f"Content: {doc.page_content}...\n\n"
        
        return content
    except Exception as e:
        return f"Error searching Wikipedia: {str(e)}"

# Create YouTube transcript analysis tool
@tool
def analyze_youtube_video(video_url: str) -> str:
    """Analyze a YouTube video by loading and processing its transcript.
    
    Args:
        video_url: The YouTube video URL to analyze
        
    Returns:
        str: The transcript content of the YouTube video
    """
    # try:
        # # Method 1: Try with basic YoutubeLoader first
        # try:
        #     loader = YoutubeLoader.from_youtube_url(
        #         video_url,
        #         add_video_info=True,
        #         language=["en", "en-US", "en-GB"]  # Try multiple English variants
        #     )
        #     docs = loader.load()
            
        #     if docs:
        #         content = ""
        #         for doc in docs:
        #             title = doc.metadata.get('title', 'Unknown Video')
        #             author = doc.metadata.get('author', 'Unknown Author')
        #             length = doc.metadata.get('length', 'Unknown')
                    
        #             content += f"Video Title: {title}\n"
        #             content += f"Author: {author}\n"
        #             content += f"Length: {length} seconds\n"
        #             content += f"Transcript:\n{doc.page_content}\n\n"
                
        #         return content
        # except Exception as e1:
        #     print(f"Method 1 failed: {e1}")
            
            # Method 2: Try without video info
            # try:
            #     loader = YoutubeLoader.from_youtube_url(
            #         video_url,
            #         add_video_info=False,
            #         language=["en"]
            #     )
            #     docs = loader.load()
                
            #     if docs:
            #         content = f"Video URL: {video_url}\n"
            #         content += f"Transcript:\n{docs[0].page_content}\n\n"
            #         return content
            # except Exception as e2:
            #     print(f"Method 2 failed: {e2}")
                
            #     # Method 3: Try with chunked format
    try:
        loader = YoutubeLoader.from_youtube_url(
            video_url,
            add_video_info=False,
            transcript_format=TranscriptFormat.CHUNKS,
            chunk_size_seconds=60
        )
        docs = loader.load()
        
        if docs:
            content = f"Video URL: {video_url}\n"
            content += "Transcript (Chunked):\n"
            for i, doc in enumerate(docs[:5]):  # Limit to first 5 chunks
                content += f"Chunk {i+1}: {doc.page_content}\n"
            return content
    except Exception as e:
        print(f"Analyze video failed: {e}")

# Initialize Langfuse CallbackHandler globally
def get_langfuse_handler():
    """Get configured Langfuse handler"""
    # Langfuse will automatically read LANGFUSE_PUBLIC_KEY, LANGFUSE_SECRET_KEY, and LANGFUSE_HOST from environment
    return CallbackHandler()

def build_jasper():
    # Generate the chat interface, including the tools
    # llm = HuggingFaceEndpoint(
    #     repo_id="Qwen/Qwen2.5-Coder-32B-Instruct",
    #     huggingfacehub_api_token=os.getenv("HUGGINGFACE_API_TOKEN"),
    # )

    tools = [search_tool, search_wikipedia, analyze_youtube_video]

    # llm = HuggingFaceEndpoint(
    #     repo_id="Qwen/Qwen2.5-Omni-3B",
    #     huggingfacehub_api_token=os.getenv("HUGGINGFACE_API_TOKEN"),
    # )
    # chat = ChatHuggingFace(llm=llm, verbose=True)
    # chat_with_tools = chat.bind_tools(tools)

    # Set your OpenAI API key here
    llm = ChatOpenAI(
    model="gpt-4o", 
    temperature=0,
    api_key=os.getenv("OPENAI_API_KEY")
    )
    chat_with_tools = llm.bind_tools(tools, parallel_tool_calls=False)


    # Generate the AgentState and Agent graph
    class AgentState(TypedDict):
        messages: Annotated[list[AnyMessage], add_messages]


    def assistant(state: AgentState):
        return {
            "messages": [chat_with_tools.invoke(state["messages"])],
        }

    ## The graph
    builder = StateGraph(AgentState)

    # Define nodes: these do the work
    builder.add_node("assistant", assistant)
    builder.add_node("tools", ToolNode(tools))

    # Define edges: these determine how the control flow moves
    builder.add_edge(START, "assistant")
    builder.add_conditional_edges(
        "assistant",
        # If the latest message requires a tool, route to tools
        # Otherwise, provide a direct response
        tools_condition,
    )
    builder.add_edge("tools", "assistant")
    
    # Compile the graph without callback parameter
    jasper = builder.compile()
    print("Langfuse tracing enabled - traces will be available in your Langfuse dashboard")
    return jasper

def run_jasper():
    jasper = build_jasper()
    messages = [HumanMessage(content="Examine the video at https://www.youtube.com/watch?v=1htKBjuUWec.\n\nWhat does Teal'c say in response to the question \"Isn't that hot?\"")]
    
    # Get Langfuse handler for tracing
    langfuse_handler = get_langfuse_handler()
    
    # Add trace metadata for this specific run
    response = jasper.invoke(
        {"messages": messages},
        config={
            "callbacks": [langfuse_handler],
            "metadata": {
                "trace_name": "YouTube_Video_Analysis",
                "user_id": "jasper-user",
                "session_id": "jasper-agent-session"
            }
        }
    )

    print("Jasper's Response:")
    print(response['messages'][-1].content)

if __name__ == "__main__":
    run_jasper()