mafzaal commited on
Commit
2dad3d9
·
1 Parent(s): fbc22d4

Implement LangGraph Agent for Research with Document Retrieval and Search Tools

Browse files

- Added `agent.py` to define the Research Agent's state and processing logic.
- Implemented message handling, context retrieval from documents, and model invocation.
- Created a document search tool to query uploaded documents.
- Developed a function to convert user inputs into the agent's expected format.
- Introduced a search tools module in `search_tools.py` to integrate Tavily, DuckDuckGo, and Arxiv search functionalities.
- Established a comprehensive agent chain that includes retrieval, processing, and tool execution.

handlers/chainlit_handlers.py CHANGED
@@ -7,276 +7,16 @@ from langchain_openai import ChatOpenAI, OpenAIEmbeddings
7
  from langchain_qdrant import QdrantVectorStore
8
  from qdrant_client import QdrantClient
9
  from qdrant_client.models import Distance, VectorParams
10
- from langchain import hub
11
- from langchain.agents import create_openai_functions_agent, AgentExecutor
12
- # Update memory import to use the newer approach
13
- from langchain_core.runnables.history import RunnableWithMessageHistory
14
- from langchain_core.chat_history import BaseChatMessageHistory
15
- from langchain_core.prompts import MessagesPlaceholder
16
 
17
  from utils.file_processor import process_file
18
  from models.rag import LangChainRAG
19
- from models.research_tools import ResearchToolkit, RAGQueryInput
 
 
20
  import config
21
- from langchain_community.tools.tavily_search import TavilySearchResults
22
- from langchain_community.tools.arxiv.tool import ArxivQueryRun
23
- from typing import TypedDict, Annotated, Dict, Any, Literal, Union, cast, List, Optional
24
- from langgraph.graph.message import add_messages
25
- import operator
26
- from langchain_core.messages import BaseMessage, SystemMessage
27
- from langgraph.graph import StateGraph, END
28
- from langchain_core.messages import HumanMessage
29
- from langchain_community.tools import DuckDuckGoSearchResults
30
- from langchain_core.documents import Document
31
- from langchain_core.tools import Tool
32
-
33
- tavily_tool = TavilySearchResults(max_results=5)
34
- duckduckgo_tool = DuckDuckGoSearchResults(max_results=5)
35
- arxiv_tool = ArxivQueryRun()
36
-
37
- tool_belt = [
38
- tavily_tool,
39
- duckduckgo_tool,
40
- arxiv_tool,
41
- ]
42
-
43
- model = ChatOpenAI(model="gpt-4o", temperature=0)
44
- model = model.bind_tools(tool_belt)
45
-
46
- class ResearchAgentState(TypedDict):
47
- """
48
- State definition for the Research Agent using LangGraph.
49
-
50
- Attributes:
51
- messages: List of messages in the conversation
52
- context: Additional context information from RAG retrievals
53
- documents: Optional list of Document objects from uploaded files
54
- """
55
- messages: Annotated[list[BaseMessage], add_messages]
56
- context: str
57
- documents: Optional[List[Document]]
58
-
59
-
60
- from langgraph.prebuilt import ToolNode
61
-
62
-
63
- def call_model(state: Dict[str, Any]) -> Dict[str, list[BaseMessage]]:
64
- """
65
- Process the current state through the language model.
66
-
67
- Args:
68
- state: Current state containing messages and context
69
-
70
- Returns:
71
- Updated state with model's response added to messages
72
- """
73
- try:
74
- messages = state["messages"]
75
- context = state.get("context", "")
76
-
77
- # Add context from documents if available
78
- if context:
79
- # Insert system message with context before the latest user message
80
- context_message = SystemMessage(content=f"Use the following information from uploaded documents to enhance your response if relevant:\n\n{context}")
81
-
82
- # Find the position of the last user message
83
- for i in range(len(messages)-1, -1, -1):
84
- if isinstance(messages[i], HumanMessage):
85
- # Insert context right after the last user message
86
- enhanced_messages = messages[:i+1] + [context_message] + messages[i+1:]
87
- break
88
- else:
89
- # No user message found, just append context
90
- enhanced_messages = messages + [context_message]
91
- else:
92
- enhanced_messages = messages
93
-
94
- # Get response from the model
95
- response = model.invoke(enhanced_messages)
96
- return {"messages": [response]}
97
- except Exception as e:
98
- # Handle exceptions gracefully
99
- error_msg = f"Error calling model: {str(e)}"
100
- print(error_msg) # Log the error
101
- # Return a fallback response
102
- return {"messages": [HumanMessage(content=error_msg)]}
103
-
104
-
105
- def should_continue(state: Dict[str, Any]) -> Union[Literal["action"], Literal[END]]:
106
- """
107
- Determine if the agent should continue processing or end.
108
-
109
- Args:
110
- state: Current state containing messages and context
111
-
112
- Returns:
113
- "action" if tool calls are present, otherwise END
114
- """
115
- last_message = state["messages"][-1]
116
-
117
- if last_message.tool_calls:
118
- return "action"
119
-
120
- return END
121
-
122
-
123
- def convert_inputs(input_object: Dict[str, str]) -> Dict[str, list[BaseMessage]]:
124
- """
125
- Convert user input into the format expected by the agent.
126
-
127
- Args:
128
- input_object: Dictionary containing the user's question
129
-
130
- Returns:
131
- Formatted input state for the agent
132
- """
133
- return {"messages": [HumanMessage(content=input_object["question"])]}
134
-
135
-
136
- def parse_output(input_state: Dict[str, Any]) -> str:
137
- """
138
- Extract the final response from the agent's state.
139
-
140
- Args:
141
- input_state: The final state of the agent
142
-
143
- Returns:
144
- The content of the last message
145
- """
146
- try:
147
- return cast(str, input_state["messages"][-1].content)
148
- except (IndexError, KeyError, AttributeError) as e:
149
- # Handle potential errors when accessing the output
150
- error_msg = f"Error parsing output: {str(e)}"
151
- print(error_msg) # Log the error
152
- return "I encountered an error while processing your request."
153
-
154
-
155
- def build_agent_chain() -> Any:
156
- """
157
- Constructs and returns the research agent execution chain.
158
-
159
- The chain consists of:
160
- 1. A retrieval node that gets context from documents
161
- 2. An agent node that processes messages
162
- 3. A tool node that executes tools when called
163
-
164
- Returns:
165
- Compiled agent chain ready for execution
166
- """
167
- # Create document search tool
168
- doc_search_tool = Tool(
169
- name="DocumentSearch",
170
- description="Search within the user's uploaded document. Use this tool when you need information from the specific document that was uploaded.",
171
- func=document_search_tool,
172
- args_schema=RAGQueryInput
173
- )
174
-
175
- # Add document search tool to the tool belt if we have upload capability
176
- tools = tool_belt.copy()
177
- tools.append(doc_search_tool)
178
-
179
- # Create a node for tool execution
180
- tool_node = ToolNode(tools)
181
-
182
- # Initialize the graph with our state type
183
- uncompiled_graph = StateGraph(ResearchAgentState)
184
-
185
- # Add nodes
186
- uncompiled_graph.add_node("retrieve", retrieve_from_documents)
187
- uncompiled_graph.add_node("agent", call_model)
188
- uncompiled_graph.add_node("action", tool_node)
189
-
190
- # Set the entry point to retrieve context first
191
- uncompiled_graph.set_entry_point("retrieve")
192
-
193
- # Add edges
194
- uncompiled_graph.add_edge("retrieve", "agent")
195
-
196
- # Add conditional edges from agent
197
- uncompiled_graph.add_conditional_edges(
198
- "agent",
199
- should_continue,
200
- {
201
- "action": "action",
202
- END: END
203
- }
204
- )
205
-
206
- # Complete the loop
207
- uncompiled_graph.add_edge("action", "agent")
208
-
209
- # Compile the graph
210
- compiled_graph = uncompiled_graph.compile()
211
-
212
- # Create the full chain
213
- agent_chain = convert_inputs | compiled_graph
214
- return agent_chain
215
-
216
-
217
- def retrieve_from_documents(state: Dict[str, Any]) -> Dict[str, str]:
218
- """
219
- Retrieve relevant context from uploaded documents based on the user query.
220
-
221
- Args:
222
- state: Current state containing messages and optional documents
223
-
224
- Returns:
225
- Updated state with context from document retrieval
226
- """
227
- # Get the last user message
228
- for message in reversed(state["messages"]):
229
- if isinstance(message, HumanMessage):
230
- query = message.content
231
- break
232
- else:
233
- # No user message found
234
- return {"context": ""}
235
-
236
- # Skip if no documents are uploaded
237
- retriever = cl.user_session.get("retriever")
238
- if not retriever:
239
- return {"context": ""}
240
-
241
- try:
242
- # Retrieve relevant documents
243
- docs = retriever.invoke(query)
244
- if not docs:
245
- return {"context": ""}
246
-
247
- # Extract text from documents
248
- context = "\n\n".join([f"Document excerpt: {doc.page_content}" for doc in docs])
249
- return {"context": context}
250
- except Exception as e:
251
- print(f"Error retrieving from documents: {str(e)}")
252
- return {"context": ""}
253
-
254
-
255
- def document_search_tool(query: str) -> str:
256
- """
257
- Tool function to search within uploaded documents.
258
-
259
- Args:
260
- query: Search query string
261
-
262
- Returns:
263
- Information retrieved from the documents
264
- """
265
- retriever = cl.user_session.get("retriever")
266
- if not retriever:
267
- return "No documents have been uploaded yet. Please upload a document first."
268
-
269
- docs = retriever.invoke(query)
270
- if not docs:
271
- return "No relevant information found in the uploaded documents."
272
-
273
- # Format the results
274
- results = []
275
- for i, doc in enumerate(docs):
276
- results.append(f"[Document {i+1}] {doc.page_content}")
277
-
278
- return "\n\n".join(results)
279
-
280
 
281
  @cl.on_chat_start
282
  async def on_chat_start():
@@ -289,8 +29,11 @@ async def on_chat_start():
289
  content="Welcome to the Research Agent! I can help you research topics using web search, arXiv papers, and documents you upload."
290
  ).send()
291
 
 
 
 
292
  # Create the agent
293
- agent = build_agent_chain()
294
 
295
  # Store agent in user session
296
  cl.user_session.set("agent", agent)
@@ -328,7 +71,7 @@ async def main(message):
328
  with cl.Step(name="Research Process", type="tool") as step:
329
  # Run the agent executor with callbacks to stream the response
330
  result = await agent_executor.ainvoke(
331
- {"question" : message.content},
332
  config={
333
  "callbacks": [cl.AsyncLangchainCallbackHandler()],
334
  "configurable": {"session_id": message.id} # Add session_id from message
@@ -348,10 +91,9 @@ async def main(message):
348
  ).send()
349
 
350
  # Get the final answer
351
- final_answer = parse_output(result) #result["messages"][-1].content
352
 
353
- # Fix: Replace cl.make_async_gen with proper token streaming in Chainlit 2.0.4
354
- # Instead of using make_async_gen, we'll manually stream tokens from the final_answer
355
  await msg.stream_token(final_answer)
356
  await msg.send()
357
 
@@ -407,8 +149,11 @@ async def process_uploaded_file(file: cl.File, msg: cl.Message):
407
  # Store the retriever in the user session
408
  cl.user_session.set("retriever", retriever)
409
 
410
- # Rebuild the agent chain with updated tools
411
- agent = build_agent_chain()
 
 
 
412
  cl.user_session.set("agent", agent)
413
 
414
  # Let the user know that the file is processed
 
7
  from langchain_qdrant import QdrantVectorStore
8
  from qdrant_client import QdrantClient
9
  from qdrant_client.models import Distance, VectorParams
10
+ from langchain_core.tools import Tool
11
+ from typing import Dict, Any, List, Optional
12
+ from langchain_core.documents import Document
 
 
 
13
 
14
  from utils.file_processor import process_file
15
  from models.rag import LangChainRAG
16
+ from models.research_tools import RAGQueryInput
17
+ from models.search_tools import create_search_tools
18
+ from models.agent import build_agent_chain, parse_output
19
  import config
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
 
21
  @cl.on_chat_start
22
  async def on_chat_start():
 
29
  content="Welcome to the Research Agent! I can help you research topics using web search, arXiv papers, and documents you upload."
30
  ).send()
31
 
32
+ # Create search tools
33
+ tools = create_search_tools(max_results=config.MAX_TAVILY_SEARCH_RESULTS)
34
+
35
  # Create the agent
36
+ agent = build_agent_chain(tools)
37
 
38
  # Store agent in user session
39
  cl.user_session.set("agent", agent)
 
71
  with cl.Step(name="Research Process", type="tool") as step:
72
  # Run the agent executor with callbacks to stream the response
73
  result = await agent_executor.ainvoke(
74
+ {"question": message.content},
75
  config={
76
  "callbacks": [cl.AsyncLangchainCallbackHandler()],
77
  "configurable": {"session_id": message.id} # Add session_id from message
 
91
  ).send()
92
 
93
  # Get the final answer
94
+ final_answer = parse_output(result)
95
 
96
+ # Stream tokens from the final_answer
 
97
  await msg.stream_token(final_answer)
98
  await msg.send()
99
 
 
149
  # Store the retriever in the user session
150
  cl.user_session.set("retriever", retriever)
151
 
152
+ # Get the search tools
153
+ tools = create_search_tools(max_results=config.MAX_TAVILY_SEARCH_RESULTS)
154
+
155
+ # Rebuild the agent with the retriever
156
+ agent = build_agent_chain(tools, retriever)
157
  cl.user_session.set("agent", agent)
158
 
159
  # Let the user know that the file is processed
models/agent.py ADDED
@@ -0,0 +1,269 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ LangGraph Agent implementation for the Research Agent.
3
+ """
4
+ from typing import TypedDict, Annotated, Dict, Any, Literal, Union, cast, List, Optional
5
+
6
+ from langchain_openai import ChatOpenAI
7
+ from langchain_core.tools import Tool
8
+ from langchain_core.messages import BaseMessage, SystemMessage, HumanMessage
9
+ from langchain_core.documents import Document
10
+
11
+ from langgraph.graph.message import add_messages
12
+ from langgraph.graph import StateGraph, END
13
+ from langgraph.prebuilt import ToolNode
14
+ from models.research_tools import RAGQueryInput
15
+
16
+ # Define END as a string constant since we can't use it directly in type annotations
17
+ END_STATE = "end"
18
+
19
+ class ResearchAgentState(TypedDict):
20
+ """
21
+ State definition for the Research Agent using LangGraph.
22
+
23
+ Attributes:
24
+ messages: List of messages in the conversation
25
+ context: Additional context information from RAG retrievals
26
+ documents: Optional list of Document objects from uploaded files
27
+ """
28
+ messages: Annotated[list[BaseMessage], add_messages]
29
+ context: str
30
+ documents: Optional[List[Document]]
31
+
32
+
33
+ def call_model(model, state: Dict[str, Any]) -> Dict[str, list[BaseMessage]]:
34
+ """
35
+ Process the current state through the language model.
36
+
37
+ Args:
38
+ model: Language model with tools bound
39
+ state: Current state containing messages and context
40
+
41
+ Returns:
42
+ Updated state with model's response added to messages
43
+ """
44
+ try:
45
+ messages = state["messages"]
46
+ context = state.get("context", "")
47
+
48
+ # Add context from documents if available
49
+ if context:
50
+ # Insert system message with context before the latest user message
51
+ context_message = SystemMessage(content=f"Use the following information from uploaded documents to enhance your response if relevant:\n\n{context}")
52
+
53
+ # Find the position of the last user message
54
+ for i in range(len(messages)-1, -1, -1):
55
+ if isinstance(messages[i], HumanMessage):
56
+ # Insert context right after the last user message
57
+ enhanced_messages = messages[:i+1] + [context_message] + messages[i+1:]
58
+ break
59
+ else:
60
+ # No user message found, just append context
61
+ enhanced_messages = messages + [context_message]
62
+ else:
63
+ enhanced_messages = messages
64
+
65
+ # Get response from the model
66
+ response = model.invoke(enhanced_messages)
67
+ return {"messages": [response]}
68
+ except Exception as e:
69
+ # Handle exceptions gracefully
70
+ error_msg = f"Error calling model: {str(e)}"
71
+ print(error_msg) # Log the error
72
+ # Return a fallback response
73
+ return {"messages": [HumanMessage(content=error_msg)]}
74
+
75
+
76
+ def should_continue(state: Dict[str, Any]) -> Union[Literal["action"], Literal["end"]]:
77
+ """
78
+ Determine if the agent should continue processing or end.
79
+
80
+ Args:
81
+ state: Current state containing messages and context
82
+
83
+ Returns:
84
+ "action" if tool calls are present, otherwise "end"
85
+ """
86
+ last_message = state["messages"][-1]
87
+
88
+ if last_message.tool_calls:
89
+ return "action"
90
+
91
+ return "end"
92
+
93
+
94
+ def retrieve_from_documents(state: Dict[str, Any], retriever) -> Dict[str, str]:
95
+ """
96
+ Retrieve relevant context from uploaded documents based on the user query.
97
+
98
+ Args:
99
+ state: Current state containing messages and optional documents
100
+ retriever: Document retriever to use
101
+
102
+ Returns:
103
+ Updated state with context from document retrieval
104
+ """
105
+ # Get the last user message
106
+ for message in reversed(state["messages"]):
107
+ if isinstance(message, HumanMessage):
108
+ query = message.content
109
+ break
110
+ else:
111
+ # No user message found
112
+ return {"context": ""}
113
+
114
+ # Skip if no documents are uploaded
115
+ if not retriever:
116
+ return {"context": ""}
117
+
118
+ try:
119
+ # Retrieve relevant documents
120
+ docs = retriever.invoke(query)
121
+ if not docs:
122
+ return {"context": ""}
123
+
124
+ # Extract text from documents
125
+ context = "\n\n".join([f"Document excerpt: {doc.page_content}" for doc in docs])
126
+ return {"context": context}
127
+ except Exception as e:
128
+ print(f"Error retrieving from documents: {str(e)}")
129
+ return {"context": ""}
130
+
131
+
132
+ def document_search_tool(retriever, query: str) -> str:
133
+ """
134
+ Tool function to search within uploaded documents.
135
+
136
+ Args:
137
+ retriever: Document retriever to use
138
+ query: Search query string
139
+
140
+ Returns:
141
+ Information retrieved from the documents
142
+ """
143
+ if not retriever:
144
+ return "No documents have been uploaded yet. Please upload a document first."
145
+
146
+ docs = retriever.invoke(query)
147
+ if not docs:
148
+ return "No relevant information found in the uploaded documents."
149
+
150
+ # Format the results
151
+ results = []
152
+ for i, doc in enumerate(docs):
153
+ results.append(f"[Document {i+1}] {doc.page_content}")
154
+
155
+ return "\n\n".join(results)
156
+
157
+
158
+ def convert_inputs(input_object: Dict[str, str]) -> Dict[str, list[BaseMessage]]:
159
+ """
160
+ Convert user input into the format expected by the agent.
161
+
162
+ Args:
163
+ input_object: Dictionary containing the user's question
164
+
165
+ Returns:
166
+ Formatted input state for the agent
167
+ """
168
+ return {"messages": [HumanMessage(content=input_object["question"])]}
169
+
170
+
171
+ def parse_output(input_state: Dict[str, Any]) -> str:
172
+ """
173
+ Extract the final response from the agent's state.
174
+
175
+ Args:
176
+ input_state: The final state of the agent
177
+
178
+ Returns:
179
+ The content of the last message
180
+ """
181
+ try:
182
+ return cast(str, input_state["messages"][-1].content)
183
+ except (IndexError, KeyError, AttributeError) as e:
184
+ # Handle potential errors when accessing the output
185
+ error_msg = f"Error parsing output: {str(e)}"
186
+ print(error_msg) # Log the error
187
+ return "I encountered an error while processing your request."
188
+
189
+
190
+ def build_agent_chain(tools, retriever=None):
191
+ """
192
+ Constructs and returns the research agent execution chain.
193
+
194
+ The chain consists of:
195
+ 1. A retrieval node that gets context from documents
196
+ 2. An agent node that processes messages
197
+ 3. A tool node that executes tools when called
198
+
199
+ Args:
200
+ tools: List of tools for the agent
201
+ retriever: Optional retriever for document search
202
+
203
+ Returns:
204
+ Compiled agent chain ready for execution
205
+ """
206
+ # Create an instance of ChatOpenAI
207
+ model = ChatOpenAI(model="gpt-4o", temperature=0)
208
+ model = model.bind_tools(tools)
209
+
210
+ # Create document search tool if retriever is provided
211
+ if retriever:
212
+ doc_search_tool = Tool(
213
+ name="DocumentSearch",
214
+ description="Search within the user's uploaded document. Use this tool when you need information from the specific document that was uploaded.",
215
+ func=lambda query: document_search_tool(retriever, query),
216
+ args_schema=RAGQueryInput
217
+ )
218
+
219
+ # Add document search tool to the tool belt if we have upload capability
220
+ tools = tools.copy()
221
+ tools.append(doc_search_tool)
222
+
223
+ # Create a node for tool execution
224
+ tool_node = ToolNode(tools)
225
+
226
+ # Initialize the graph with our state type
227
+ uncompiled_graph = StateGraph(ResearchAgentState)
228
+
229
+ # Define model node factory with bound model
230
+ def call_model_node(state):
231
+ return call_model(model, state)
232
+
233
+ # Add nodes
234
+ if retriever:
235
+ # Define retrieval node factory with bound retriever
236
+ def retrieve_node(state):
237
+ return retrieve_from_documents(state, retriever)
238
+
239
+ uncompiled_graph.add_node("retrieve", retrieve_node)
240
+ uncompiled_graph.set_entry_point("retrieve")
241
+ uncompiled_graph.add_edge("retrieve", "agent")
242
+ else:
243
+ uncompiled_graph.set_entry_point("agent")
244
+
245
+ uncompiled_graph.add_node("agent", call_model_node)
246
+ uncompiled_graph.add_node("action", tool_node)
247
+
248
+ # Add an end node - this is required for the "end" state to be valid
249
+ uncompiled_graph.add_node("end", lambda state: state)
250
+
251
+ # Add conditional edges from agent
252
+ uncompiled_graph.add_conditional_edges(
253
+ "agent",
254
+ should_continue,
255
+ {
256
+ "action": "action",
257
+ "end": END
258
+ }
259
+ )
260
+
261
+ # Complete the loop
262
+ uncompiled_graph.add_edge("action", "agent")
263
+
264
+ # Compile the graph
265
+ compiled_graph = uncompiled_graph.compile()
266
+
267
+ # Create the full chain
268
+ agent_chain = convert_inputs | compiled_graph
269
+ return agent_chain
models/research_tools.py CHANGED
@@ -1,148 +1,28 @@
1
  """
2
  Research tools implementation for the agent.
3
 
4
- This module implements the Tavily search, arXiv, and RAG tools
5
- that will be used by the research agent.
6
  """
7
- import os
8
- from typing import List, Dict, Any, Optional
9
- from langchain.agents import tool
10
- from langchain_core.tools import Tool
11
- from pydantic import BaseModel, Field # Updated import from pydantic directly
12
-
13
- from langchain_openai import ChatOpenAI
14
- from langchain_community.tools.tavily_search import TavilySearchResults
15
- from langchain_community.utilities.arxiv import ArxivAPIWrapper
16
 
17
- import config
18
- from models.rag import LangChainRAG
19
 
20
  class ArxivQueryInput(BaseModel):
21
  """Input for arXiv query."""
22
  query: str = Field(..., description="The search query to find papers on arXiv")
23
- max_results: int = Field(default=config.MAX_ARXIV_SEARCH_RESULTS, description="The maximum number of results to return")
24
 
25
  class RAGQueryInput(BaseModel):
26
  """Input for RAG query."""
27
  query: str = Field(..., description="The query to search in the uploaded document")
28
 
29
- def create_tavily_search_tool() -> Tool:
30
- """Create a Tavily search tool for the agent."""
31
- # Check if TAVILY_API_KEY is in environment variables
32
- if "TAVILY_API_KEY" not in os.environ:
33
- print("Warning: TAVILY_API_KEY environment variable not set. Web search functionality may be limited.")
34
-
35
- return TavilySearchResults(max_results=config.MAX_TAVILY_SEARCH_RESULTS)
36
-
37
- @tool
38
- def arxiv_search(query: str, max_results: int = config.MAX_ARXIV_SEARCH_RESULTS) -> str:
39
- """
40
- Search for papers on arXiv.
41
-
42
- Args:
43
- query: The search query string
44
- max_results: Maximum number of results to return
45
-
46
- Returns:
47
- A string summary of the search results
48
- """
49
- client = ArxivAPIWrapper(
50
- top_k_results=max_results,
51
- ARXIV_MAX_QUERY_LENGTH=300,
52
- load_max_docs=max_results,
53
- load_all_available_meta=True
54
- )
55
-
56
- try:
57
- results = client.run(query)
58
- if not results:
59
- return "No papers found on arXiv for this query."
60
-
61
- formatted_results = []
62
- for idx, result in enumerate(results.split("\n\n")):
63
- if result.strip():
64
- formatted_results.append(f"[{idx+1}] {result.strip()}")
65
-
66
- return "\n\n".join(formatted_results)
67
- except Exception as e:
68
- return f"Error searching arXiv: {str(e)}"
69
 
70
- class ResearchToolkit:
71
- """
72
- A toolkit of research tools for the agent.
73
- """
74
- def __init__(self, rag_chain: Optional[LangChainRAG] = None):
75
- """
76
- Initialize the research toolkit.
77
-
78
- Args:
79
- rag_chain: Optional RAG chain instance
80
- """
81
- self.rag_chain = rag_chain
82
- self.tools = self._create_tools()
83
-
84
- def _create_tools(self) -> List[Tool]:
85
- """
86
- Create the tools for the agent.
87
-
88
- Returns:
89
- List of tools
90
- """
91
- tools = [
92
- create_tavily_search_tool(),
93
- Tool(
94
- name="ArxivSearch",
95
- description="Search for scientific papers on arXiv. Use this tool when you need academic or scientific information.",
96
- func=arxiv_search,
97
- args_schema=ArxivQueryInput
98
- )
99
- ]
100
-
101
- # Add RAG tool if available
102
- if self.rag_chain:
103
- @tool
104
- def document_rag_search(query: str) -> str:
105
- """
106
- Search the uploaded document using RAG.
107
-
108
- Args:
109
- query: The search query string
110
-
111
- Returns:
112
- The response from the RAG model
113
- """
114
- docs = self.rag_chain.retriever.invoke(query)
115
- context = "\n\n".join([doc.page_content for doc in docs])
116
- response = self.rag_chain.chain.invoke(query)
117
-
118
- return f"Based on the uploaded document: {response}"
119
-
120
- tools.append(
121
- Tool(
122
- name="DocumentSearch",
123
- description="Search within the user's uploaded document. Use this tool when you need information from the specific document that was uploaded.",
124
- func=document_rag_search,
125
- args_schema=RAGQueryInput
126
- )
127
- )
128
-
129
- return tools
130
-
131
- def get_tools(self) -> List[Tool]:
132
- """
133
- Get the list of tools.
134
-
135
- Returns:
136
- List of tools
137
- """
138
- return self.tools
139
-
140
- def set_rag_chain(self, rag_chain: LangChainRAG):
141
- """
142
- Update the RAG chain and rebuild tools.
143
-
144
- Args:
145
- rag_chain: New RAG chain instance
146
- """
147
- self.rag_chain = rag_chain
148
- self.tools = self._create_tools()
 
1
  """
2
  Research tools implementation for the agent.
3
 
4
+ This module implements input schemas and tools specifically for research purposes.
 
5
  """
6
+ from typing import List, Optional
7
+ from pydantic import BaseModel, Field
 
 
 
 
 
 
 
8
 
9
+ from langchain_core.tools import Tool
 
10
 
11
  class ArxivQueryInput(BaseModel):
12
  """Input for arXiv query."""
13
  query: str = Field(..., description="The search query to find papers on arXiv")
14
+ max_results: int = Field(default=5, description="The maximum number of results to return")
15
 
16
  class RAGQueryInput(BaseModel):
17
  """Input for RAG query."""
18
  query: str = Field(..., description="The query to search in the uploaded document")
19
 
20
+ class WebSearchInput(BaseModel):
21
+ """Input for web search."""
22
+ query: str = Field(..., description="The search query for web search")
23
+ max_results: int = Field(default=5, description="The maximum number of results to return")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
 
25
+ class DocumentAnalysisInput(BaseModel):
26
+ """Input for document analysis."""
27
+ query: str = Field(..., description="The specific question to analyze in the document")
28
+ include_citations: bool = Field(default=True, description="Whether to include citations in the response")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
models/search_tools.py ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Search tools module containing different search implementations.
3
+ """
4
+ from langchain_community.tools.tavily_search import TavilySearchResults
5
+ from langchain_community.tools.arxiv.tool import ArxivQueryRun
6
+ from langchain_community.tools import DuckDuckGoSearchResults
7
+ from langchain_core.tools import Tool
8
+
9
+ def create_search_tools(max_results=5):
10
+ """
11
+ Create search tools for the research agent.
12
+
13
+ Args:
14
+ max_results: Maximum number of results to return
15
+
16
+ Returns:
17
+ List of search tools for the agent
18
+ """
19
+ # Initialize standard search tools
20
+ tavily_tool = TavilySearchResults(max_results=max_results)
21
+ duckduckgo_tool = DuckDuckGoSearchResults(max_results=max_results)
22
+ arxiv_tool = ArxivQueryRun()
23
+
24
+ return [
25
+ tavily_tool,
26
+ duckduckgo_tool,
27
+ arxiv_tool,
28
+ ]
pyproject.toml CHANGED
@@ -8,6 +8,7 @@ dependencies = [
8
  "arxiv>=2.2.0",
9
  "chainlit==2.0.4",
10
  "duckduckgo-search>=8.0.1",
 
11
  "langchain>=0.3.23",
12
  "langchain-community>=0.3.21",
13
  "langchain-core>=0.3.54",
@@ -16,6 +17,8 @@ dependencies = [
16
  "langchain-qdrant>=0.2.0",
17
  "langchain-text-splitters>=0.3.8",
18
  "langgraph>=0.3.31",
 
 
19
  "numpy==2.2.2",
20
  "openai==1.59.9",
21
  "pydantic==2.10.1",
 
8
  "arxiv>=2.2.0",
9
  "chainlit==2.0.4",
10
  "duckduckgo-search>=8.0.1",
11
+ "feedparser>=6.0.11",
12
  "langchain>=0.3.23",
13
  "langchain-community>=0.3.21",
14
  "langchain-core>=0.3.54",
 
17
  "langchain-qdrant>=0.2.0",
18
  "langchain-text-splitters>=0.3.8",
19
  "langgraph>=0.3.31",
20
+ "listparser>=0.20",
21
+ "newspaper3k>=0.2.8",
22
  "numpy==2.2.2",
23
  "openai==1.59.9",
24
  "pydantic==2.10.1",
utils/file_processor.py CHANGED
@@ -4,24 +4,65 @@ Utilities for processing uploaded files.
4
  import os
5
  import tempfile
6
  import shutil
7
- from typing import List
 
8
 
9
  from langchain_text_splitters import RecursiveCharacterTextSplitter
10
- from langchain_community.document_loaders import PyPDFLoader, TextLoader
 
 
 
 
 
 
 
11
  from chainlit.types import AskFileResponse
12
 
13
  import config
14
 
15
- # Initialize text splitter
16
- text_splitter = RecursiveCharacterTextSplitter(
17
- chunk_size=config.CHUNK_SIZE,
18
- chunk_overlap=config.CHUNK_OVERLAP,
19
- length_function=len,
20
- is_separator_regex=False,
21
- separators=config.SEPARATORS
22
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
 
24
- def process_file(file: AskFileResponse):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
  """
26
  Process an uploaded file and split it into text chunks.
27
 
@@ -29,28 +70,34 @@ def process_file(file: AskFileResponse):
29
  file: The uploaded file response from Chainlit
30
 
31
  Returns:
32
- List of document chunks
33
  """
34
  print(f"Processing file: {file.name}")
35
 
36
  # Create a temporary file with the correct extension
37
  suffix = f".{file.name.split('.')[-1]}"
38
  with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as temp_file:
39
- # Copy the uploaded file content to the temporary file
40
- shutil.copyfile(file.path, temp_file.name)
41
- print(f"Created temporary file at: {temp_file.name}")
42
-
43
  try:
44
- # Create appropriate loader
45
- if file.name.lower().endswith('.pdf'):
46
- loader = PyPDFLoader(temp_file.name)
47
- else:
48
- loader = TextLoader(temp_file.name)
 
49
 
50
- # Load and process the documents
51
  documents = loader.load()
 
 
 
 
 
52
  texts = text_splitter.split_documents(documents)
 
53
  return texts
 
 
 
54
  finally:
55
  # Clean up the temporary file
56
  try:
 
4
  import os
5
  import tempfile
6
  import shutil
7
+ from typing import List, Optional
8
+ from pathlib import Path
9
 
10
  from langchain_text_splitters import RecursiveCharacterTextSplitter
11
+ from langchain_community.document_loaders import (
12
+ PyPDFLoader,
13
+ TextLoader,
14
+ CSVLoader,
15
+ UnstructuredExcelLoader,
16
+ Docx2txtLoader
17
+ )
18
+ from langchain_core.documents import Document
19
  from chainlit.types import AskFileResponse
20
 
21
  import config
22
 
23
+ def get_document_loader(file_path: str):
24
+ """
25
+ Get appropriate document loader based on file extension.
26
+
27
+ Args:
28
+ file_path: Path to the file
29
+
30
+ Returns:
31
+ Document loader instance
32
+ """
33
+ file_extension = Path(file_path).suffix.lower()
34
+
35
+ # Select appropriate loader based on file extension
36
+ if file_extension == '.pdf':
37
+ return PyPDFLoader(file_path)
38
+ elif file_extension == '.txt' or file_extension == '.md' or file_extension == '.py':
39
+ return TextLoader(file_path)
40
+ elif file_extension == '.csv':
41
+ return CSVLoader(file_path)
42
+ elif file_extension == '.xlsx' or file_extension == '.xls':
43
+ return UnstructuredExcelLoader(file_path)
44
+ elif file_extension == '.docx' or file_extension == '.doc':
45
+ return Docx2txtLoader(file_path)
46
+ else:
47
+ # Default to text loader
48
+ return TextLoader(file_path)
49
 
50
+ def create_text_splitter():
51
+ """
52
+ Create a text splitter with the configured settings.
53
+
54
+ Returns:
55
+ Initialized text splitter
56
+ """
57
+ return RecursiveCharacterTextSplitter(
58
+ chunk_size=config.CHUNK_SIZE,
59
+ chunk_overlap=config.CHUNK_OVERLAP,
60
+ length_function=len,
61
+ is_separator_regex=False,
62
+ separators=config.SEPARATORS
63
+ )
64
+
65
+ def process_file(file: AskFileResponse) -> Optional[List[Document]]:
66
  """
67
  Process an uploaded file and split it into text chunks.
68
 
 
70
  file: The uploaded file response from Chainlit
71
 
72
  Returns:
73
+ List of document chunks or None if processing fails
74
  """
75
  print(f"Processing file: {file.name}")
76
 
77
  # Create a temporary file with the correct extension
78
  suffix = f".{file.name.split('.')[-1]}"
79
  with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as temp_file:
 
 
 
 
80
  try:
81
+ # Copy the uploaded file content to the temporary file
82
+ shutil.copyfile(file.path, temp_file.name)
83
+ print(f"Created temporary file at: {temp_file.name}")
84
+
85
+ # Get the appropriate loader
86
+ loader = get_document_loader(temp_file.name)
87
 
88
+ # Load documents
89
  documents = loader.load()
90
+
91
+ # Initialize text splitter
92
+ text_splitter = create_text_splitter()
93
+
94
+ # Split documents into chunks
95
  texts = text_splitter.split_documents(documents)
96
+
97
  return texts
98
+ except Exception as e:
99
+ print(f"Error processing file: {e}")
100
+ return None
101
  finally:
102
  # Clean up the temporary file
103
  try:
uv.lock CHANGED
@@ -10,6 +10,7 @@ dependencies = [
10
  { name = "arxiv" },
11
  { name = "chainlit" },
12
  { name = "duckduckgo-search" },
 
13
  { name = "langchain" },
14
  { name = "langchain-community" },
15
  { name = "langchain-core" },
@@ -18,6 +19,8 @@ dependencies = [
18
  { name = "langchain-qdrant" },
19
  { name = "langchain-text-splitters" },
20
  { name = "langgraph" },
 
 
21
  { name = "numpy" },
22
  { name = "openai" },
23
  { name = "pydantic" },
@@ -33,6 +36,7 @@ requires-dist = [
33
  { name = "arxiv", specifier = ">=2.2.0" },
34
  { name = "chainlit", specifier = "==2.0.4" },
35
  { name = "duckduckgo-search", specifier = ">=8.0.1" },
 
36
  { name = "langchain", specifier = ">=0.3.23" },
37
  { name = "langchain-community", specifier = ">=0.3.21" },
38
  { name = "langchain-core", specifier = ">=0.3.54" },
@@ -41,6 +45,8 @@ requires-dist = [
41
  { name = "langchain-qdrant", specifier = ">=0.2.0" },
42
  { name = "langchain-text-splitters", specifier = ">=0.3.8" },
43
  { name = "langgraph", specifier = ">=0.3.31" },
 
 
44
  { name = "numpy", specifier = "==2.2.2" },
45
  { name = "openai", specifier = "==1.59.9" },
46
  { name = "pydantic", specifier = "==2.10.1" },
@@ -170,6 +176,19 @@ wheels = [
170
  { url = "https://files.pythonhosted.org/packages/77/06/bb80f5f86020c4551da315d78b3ab75e8228f89f0162f2c3a819e407941a/attrs-25.3.0-py3-none-any.whl", hash = "sha256:427318ce031701fea540783410126f03899a97ffc6f61596ad581ac2e40e3bc3", size = 63815 },
171
  ]
172
 
 
 
 
 
 
 
 
 
 
 
 
 
 
173
  [[package]]
174
  name = "bidict"
175
  version = "0.23.1"
@@ -295,6 +314,15 @@ wheels = [
295
  { url = "https://files.pythonhosted.org/packages/d1/d6/3965ed04c63042e047cb6a3e6ed1a63a35087b6a609aa3a15ed8ac56c221/colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6", size = 25335 },
296
  ]
297
 
 
 
 
 
 
 
 
 
 
298
  [[package]]
299
  name = "dataclasses-json"
300
  version = "0.6.7"
@@ -357,6 +385,17 @@ wheels = [
357
  { url = "https://files.pythonhosted.org/packages/50/b3/b51f09c2ba432a576fe63758bddc81f78f0c6309d9e5c10d194313bf021e/fastapi-0.115.12-py3-none-any.whl", hash = "sha256:e94613d6c05e27be7ffebdd6ea5f388112e5e430c8f7d6494a9d1d88d43e814d", size = 95164 },
358
  ]
359
 
 
 
 
 
 
 
 
 
 
 
 
360
  [[package]]
361
  name = "feedparser"
362
  version = "6.0.11"
@@ -369,6 +408,15 @@ wheels = [
369
  { url = "https://files.pythonhosted.org/packages/7c/d4/8c31aad9cc18f451c49f7f9cfb5799dadffc88177f7917bc90a66459b1d7/feedparser-6.0.11-py3-none-any.whl", hash = "sha256:0be7ee7b395572b19ebeb1d6aafb0028dee11169f1c934e0ed67d54992f4ad45", size = 81343 },
370
  ]
371
 
 
 
 
 
 
 
 
 
 
372
  [[package]]
373
  name = "filetype"
374
  version = "1.2.0"
@@ -602,6 +650,12 @@ wheels = [
602
  { url = "https://files.pythonhosted.org/packages/79/9d/0fb148dc4d6fa4a7dd1d8378168d9b4cd8d4560a6fbf6f0121c5fc34eb68/importlib_metadata-8.6.1-py3-none-any.whl", hash = "sha256:02a89390c1e15fdfdc0d7c6b25cb3e62650d0494005c97d6f148bf5b9787525e", size = 26971 },
603
  ]
604
 
 
 
 
 
 
 
605
  [[package]]
606
  name = "jiter"
607
  version = "0.9.0"
@@ -625,6 +679,15 @@ wheels = [
625
  { url = "https://files.pythonhosted.org/packages/ee/47/3729f00f35a696e68da15d64eb9283c330e776f3b5789bac7f2c0c4df209/jiter-0.9.0-cp313-cp313t-win_amd64.whl", hash = "sha256:6f7838bc467ab7e8ef9f387bd6de195c43bad82a569c1699cb822f6609dd4cdf", size = 206867 },
626
  ]
627
 
 
 
 
 
 
 
 
 
 
628
  [[package]]
629
  name = "jsonpatch"
630
  version = "1.33"
@@ -840,6 +903,15 @@ wheels = [
840
  { url = "https://files.pythonhosted.org/packages/03/a5/866b44697cee47d1cae429ed370281d937ad4439f71af82a6baaa139d26a/Lazify-0.4.0-py2.py3-none-any.whl", hash = "sha256:c2c17a7a33e9406897e3f66fde4cd3f84716218d580330e5af10cfe5a0cd195a", size = 3107 },
841
  ]
842
 
 
 
 
 
 
 
 
 
 
843
  [[package]]
844
  name = "literalai"
845
  version = "0.1.103"
@@ -950,6 +1022,45 @@ wheels = [
950
  { url = "https://files.pythonhosted.org/packages/a0/c4/c2971a3ba4c6103a3d10c4b0f24f461ddc027f0f09763220cf35ca1401b3/nest_asyncio-1.6.0-py3-none-any.whl", hash = "sha256:87af6efd6b5e897c81050477ef65c62e2b2f35d51703cae01aff2905b1852e1c", size = 5195 },
951
  ]
952
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
953
  [[package]]
954
  name = "numpy"
955
  version = "2.2.2"
@@ -1172,6 +1283,36 @@ wheels = [
1172
  { url = "https://files.pythonhosted.org/packages/ec/1a/610693ac4ee14fcdf2d9bf3c493370e4f2ef7ae2e19217d7a237ff42367d/packaging-23.2-py3-none-any.whl", hash = "sha256:8c491190033a9af7e1d931d0b5dacc2ef47509b34dd0de67ed209b5203fc88c7", size = 53011 },
1173
  ]
1174
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1175
  [[package]]
1176
  name = "portalocker"
1177
  version = "2.10.1"
@@ -1344,6 +1485,18 @@ wheels = [
1344
  { url = "https://files.pythonhosted.org/packages/8e/5e/c86a5643653825d3c913719e788e41386bee415c2b87b4f955432f2de6b2/pypdf2-3.0.1-py3-none-any.whl", hash = "sha256:d16e4205cfee272fbdc0568b68d82be796540b1537508cef59388f839c191928", size = 232572 },
1345
  ]
1346
 
 
 
 
 
 
 
 
 
 
 
 
 
1347
  [[package]]
1348
  name = "python-dotenv"
1349
  version = "1.1.0"
@@ -1470,6 +1623,18 @@ wheels = [
1470
  { url = "https://files.pythonhosted.org/packages/f9/9b/335f9764261e915ed497fcdeb11df5dfd6f7bf257d4a6a2a686d80da4d54/requests-2.32.3-py3-none-any.whl", hash = "sha256:70761cfe03c773ceb22aa2f671b4757976145175cdfca038c02654d061d6dcc6", size = 64928 },
1471
  ]
1472
 
 
 
 
 
 
 
 
 
 
 
 
 
1473
  [[package]]
1474
  name = "requests-toolbelt"
1475
  version = "1.0.0"
@@ -1509,6 +1674,15 @@ wheels = [
1509
  { url = "https://files.pythonhosted.org/packages/52/59/0782e51887ac6b07ffd1570e0364cf901ebc36345fea669969d2084baebb/simple_websocket-1.1.0-py3-none-any.whl", hash = "sha256:4af6069630a38ed6c561010f0e11a5bc0d4ca569b36306eb257cd9a192497c8c", size = 13842 },
1510
  ]
1511
 
 
 
 
 
 
 
 
 
 
1512
  [[package]]
1513
  name = "sniffio"
1514
  version = "1.3.1"
@@ -1518,6 +1692,15 @@ wheels = [
1518
  { url = "https://files.pythonhosted.org/packages/e9/44/75a9c9421471a6c4805dbf2356f7c181a29c1879239abab1ea2cc8f38b40/sniffio-1.3.1-py3-none-any.whl", hash = "sha256:2f6da418d1f1e0fddd844478f41680e794e6051915791a034ff65e5f100525a2", size = 10235 },
1519
  ]
1520
 
 
 
 
 
 
 
 
 
 
1521
  [[package]]
1522
  name = "sqlalchemy"
1523
  version = "2.0.40"
@@ -1598,6 +1781,27 @@ wheels = [
1598
  { url = "https://files.pythonhosted.org/packages/de/a8/8f499c179ec900783ffe133e9aab10044481679bb9aad78436d239eee716/tiktoken-0.9.0-cp313-cp313-win_amd64.whl", hash = "sha256:5ea0edb6f83dc56d794723286215918c1cde03712cbbafa0348b33448faf5b95", size = 894669 },
1599
  ]
1600
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1601
  [[package]]
1602
  name = "tomli"
1603
  version = "2.2.1"
 
10
  { name = "arxiv" },
11
  { name = "chainlit" },
12
  { name = "duckduckgo-search" },
13
+ { name = "feedparser" },
14
  { name = "langchain" },
15
  { name = "langchain-community" },
16
  { name = "langchain-core" },
 
19
  { name = "langchain-qdrant" },
20
  { name = "langchain-text-splitters" },
21
  { name = "langgraph" },
22
+ { name = "listparser" },
23
+ { name = "newspaper3k" },
24
  { name = "numpy" },
25
  { name = "openai" },
26
  { name = "pydantic" },
 
36
  { name = "arxiv", specifier = ">=2.2.0" },
37
  { name = "chainlit", specifier = "==2.0.4" },
38
  { name = "duckduckgo-search", specifier = ">=8.0.1" },
39
+ { name = "feedparser", specifier = ">=6.0.11" },
40
  { name = "langchain", specifier = ">=0.3.23" },
41
  { name = "langchain-community", specifier = ">=0.3.21" },
42
  { name = "langchain-core", specifier = ">=0.3.54" },
 
45
  { name = "langchain-qdrant", specifier = ">=0.2.0" },
46
  { name = "langchain-text-splitters", specifier = ">=0.3.8" },
47
  { name = "langgraph", specifier = ">=0.3.31" },
48
+ { name = "listparser", specifier = ">=0.20" },
49
+ { name = "newspaper3k", specifier = ">=0.2.8" },
50
  { name = "numpy", specifier = "==2.2.2" },
51
  { name = "openai", specifier = "==1.59.9" },
52
  { name = "pydantic", specifier = "==2.10.1" },
 
176
  { url = "https://files.pythonhosted.org/packages/77/06/bb80f5f86020c4551da315d78b3ab75e8228f89f0162f2c3a819e407941a/attrs-25.3.0-py3-none-any.whl", hash = "sha256:427318ce031701fea540783410126f03899a97ffc6f61596ad581ac2e40e3bc3", size = 63815 },
177
  ]
178
 
179
+ [[package]]
180
+ name = "beautifulsoup4"
181
+ version = "4.13.4"
182
+ source = { registry = "https://pypi.org/simple" }
183
+ dependencies = [
184
+ { name = "soupsieve" },
185
+ { name = "typing-extensions" },
186
+ ]
187
+ sdist = { url = "https://files.pythonhosted.org/packages/d8/e4/0c4c39e18fd76d6a628d4dd8da40543d136ce2d1752bd6eeeab0791f4d6b/beautifulsoup4-4.13.4.tar.gz", hash = "sha256:dbb3c4e1ceae6aefebdaf2423247260cd062430a410e38c66f2baa50a8437195", size = 621067 }
188
+ wheels = [
189
+ { url = "https://files.pythonhosted.org/packages/50/cd/30110dc0ffcf3b131156077b90e9f60ed75711223f306da4db08eff8403b/beautifulsoup4-4.13.4-py3-none-any.whl", hash = "sha256:9bbbb14bfde9d79f38b8cd5f8c7c85f4b8f2523190ebed90e950a8dea4cb1c4b", size = 187285 },
190
+ ]
191
+
192
  [[package]]
193
  name = "bidict"
194
  version = "0.23.1"
 
314
  { url = "https://files.pythonhosted.org/packages/d1/d6/3965ed04c63042e047cb6a3e6ed1a63a35087b6a609aa3a15ed8ac56c221/colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6", size = 25335 },
315
  ]
316
 
317
+ [[package]]
318
+ name = "cssselect"
319
+ version = "1.3.0"
320
+ source = { registry = "https://pypi.org/simple" }
321
+ sdist = { url = "https://files.pythonhosted.org/packages/72/0a/c3ea9573b1dc2e151abfe88c7fe0c26d1892fe6ed02d0cdb30f0d57029d5/cssselect-1.3.0.tar.gz", hash = "sha256:57f8a99424cfab289a1b6a816a43075a4b00948c86b4dcf3ef4ee7e15f7ab0c7", size = 42870 }
322
+ wheels = [
323
+ { url = "https://files.pythonhosted.org/packages/ee/58/257350f7db99b4ae12b614a36256d9cc870d71d9e451e79c2dc3b23d7c3c/cssselect-1.3.0-py3-none-any.whl", hash = "sha256:56d1bf3e198080cc1667e137bc51de9cadfca259f03c2d4e09037b3e01e30f0d", size = 18786 },
324
+ ]
325
+
326
  [[package]]
327
  name = "dataclasses-json"
328
  version = "0.6.7"
 
385
  { url = "https://files.pythonhosted.org/packages/50/b3/b51f09c2ba432a576fe63758bddc81f78f0c6309d9e5c10d194313bf021e/fastapi-0.115.12-py3-none-any.whl", hash = "sha256:e94613d6c05e27be7ffebdd6ea5f388112e5e430c8f7d6494a9d1d88d43e814d", size = 95164 },
386
  ]
387
 
388
+ [[package]]
389
+ name = "feedfinder2"
390
+ version = "0.0.4"
391
+ source = { registry = "https://pypi.org/simple" }
392
+ dependencies = [
393
+ { name = "beautifulsoup4" },
394
+ { name = "requests" },
395
+ { name = "six" },
396
+ ]
397
+ sdist = { url = "https://files.pythonhosted.org/packages/35/82/1251fefec3bb4b03fd966c7e7f7a41c9fc2bb00d823a34c13f847fd61406/feedfinder2-0.0.4.tar.gz", hash = "sha256:3701ee01a6c85f8b865a049c30ba0b4608858c803fe8e30d1d289fdbe89d0efe", size = 3297 }
398
+
399
  [[package]]
400
  name = "feedparser"
401
  version = "6.0.11"
 
408
  { url = "https://files.pythonhosted.org/packages/7c/d4/8c31aad9cc18f451c49f7f9cfb5799dadffc88177f7917bc90a66459b1d7/feedparser-6.0.11-py3-none-any.whl", hash = "sha256:0be7ee7b395572b19ebeb1d6aafb0028dee11169f1c934e0ed67d54992f4ad45", size = 81343 },
409
  ]
410
 
411
+ [[package]]
412
+ name = "filelock"
413
+ version = "3.18.0"
414
+ source = { registry = "https://pypi.org/simple" }
415
+ sdist = { url = "https://files.pythonhosted.org/packages/0a/10/c23352565a6544bdc5353e0b15fc1c563352101f30e24bf500207a54df9a/filelock-3.18.0.tar.gz", hash = "sha256:adbc88eabb99d2fec8c9c1b229b171f18afa655400173ddc653d5d01501fb9f2", size = 18075 }
416
+ wheels = [
417
+ { url = "https://files.pythonhosted.org/packages/4d/36/2a115987e2d8c300a974597416d9de88f2444426de9571f4b59b2cca3acc/filelock-3.18.0-py3-none-any.whl", hash = "sha256:c401f4f8377c4464e6db25fff06205fd89bdd83b65eb0488ed1b160f780e21de", size = 16215 },
418
+ ]
419
+
420
  [[package]]
421
  name = "filetype"
422
  version = "1.2.0"
 
650
  { url = "https://files.pythonhosted.org/packages/79/9d/0fb148dc4d6fa4a7dd1d8378168d9b4cd8d4560a6fbf6f0121c5fc34eb68/importlib_metadata-8.6.1-py3-none-any.whl", hash = "sha256:02a89390c1e15fdfdc0d7c6b25cb3e62650d0494005c97d6f148bf5b9787525e", size = 26971 },
651
  ]
652
 
653
+ [[package]]
654
+ name = "jieba3k"
655
+ version = "0.35.1"
656
+ source = { registry = "https://pypi.org/simple" }
657
+ sdist = { url = "https://files.pythonhosted.org/packages/a9/cb/2c8332bcdc14d33b0bedd18ae0a4981a069c3513e445120da3c3f23a8aaa/jieba3k-0.35.1.zip", hash = "sha256:980a4f2636b778d312518066be90c7697d410dd5a472385f5afced71a2db1c10", size = 7423646 }
658
+
659
  [[package]]
660
  name = "jiter"
661
  version = "0.9.0"
 
679
  { url = "https://files.pythonhosted.org/packages/ee/47/3729f00f35a696e68da15d64eb9283c330e776f3b5789bac7f2c0c4df209/jiter-0.9.0-cp313-cp313t-win_amd64.whl", hash = "sha256:6f7838bc467ab7e8ef9f387bd6de195c43bad82a569c1699cb822f6609dd4cdf", size = 206867 },
680
  ]
681
 
682
+ [[package]]
683
+ name = "joblib"
684
+ version = "1.4.2"
685
+ source = { registry = "https://pypi.org/simple" }
686
+ sdist = { url = "https://files.pythonhosted.org/packages/64/33/60135848598c076ce4b231e1b1895170f45fbcaeaa2c9d5e38b04db70c35/joblib-1.4.2.tar.gz", hash = "sha256:2382c5816b2636fbd20a09e0f4e9dad4736765fdfb7dca582943b9c1366b3f0e", size = 2116621 }
687
+ wheels = [
688
+ { url = "https://files.pythonhosted.org/packages/91/29/df4b9b42f2be0b623cbd5e2140cafcaa2bef0759a00b7b70104dcfe2fb51/joblib-1.4.2-py3-none-any.whl", hash = "sha256:06d478d5674cbc267e7496a410ee875abd68e4340feff4490bcb7afb88060ae6", size = 301817 },
689
+ ]
690
+
691
  [[package]]
692
  name = "jsonpatch"
693
  version = "1.33"
 
903
  { url = "https://files.pythonhosted.org/packages/03/a5/866b44697cee47d1cae429ed370281d937ad4439f71af82a6baaa139d26a/Lazify-0.4.0-py2.py3-none-any.whl", hash = "sha256:c2c17a7a33e9406897e3f66fde4cd3f84716218d580330e5af10cfe5a0cd195a", size = 3107 },
904
  ]
905
 
906
+ [[package]]
907
+ name = "listparser"
908
+ version = "0.20"
909
+ source = { registry = "https://pypi.org/simple" }
910
+ sdist = { url = "https://files.pythonhosted.org/packages/be/ee/d9f02600955ca34baf73e824d64b181b412745ed448a0ad1a92cef81115b/listparser-0.20.tar.gz", hash = "sha256:0dda5b41ca9531fc3c438eb4abf4d8a7cf03ef050d196875993e897a66c1f885", size = 12404 }
911
+ wheels = [
912
+ { url = "https://files.pythonhosted.org/packages/c9/27/bd96818acce8ed1909dff29817096016f5e958ef646a377b34d55afa23b3/listparser-0.20-py3-none-any.whl", hash = "sha256:5daae9895b75191a77b14f5b8eabf7a63a4ca440f215d9bd8d8e5a2eccde02ce", size = 14149 },
913
+ ]
914
+
915
  [[package]]
916
  name = "literalai"
917
  version = "0.1.103"
 
1022
  { url = "https://files.pythonhosted.org/packages/a0/c4/c2971a3ba4c6103a3d10c4b0f24f461ddc027f0f09763220cf35ca1401b3/nest_asyncio-1.6.0-py3-none-any.whl", hash = "sha256:87af6efd6b5e897c81050477ef65c62e2b2f35d51703cae01aff2905b1852e1c", size = 5195 },
1023
  ]
1024
 
1025
+ [[package]]
1026
+ name = "newspaper3k"
1027
+ version = "0.2.8"
1028
+ source = { registry = "https://pypi.org/simple" }
1029
+ dependencies = [
1030
+ { name = "beautifulsoup4" },
1031
+ { name = "cssselect" },
1032
+ { name = "feedfinder2" },
1033
+ { name = "feedparser" },
1034
+ { name = "jieba3k" },
1035
+ { name = "lxml" },
1036
+ { name = "nltk" },
1037
+ { name = "pillow" },
1038
+ { name = "python-dateutil" },
1039
+ { name = "pyyaml" },
1040
+ { name = "requests" },
1041
+ { name = "tinysegmenter" },
1042
+ { name = "tldextract" },
1043
+ ]
1044
+ sdist = { url = "https://files.pythonhosted.org/packages/ce/fb/8f8525be0cafa48926e85b0c06a7cb3e2a892d340b8036f8c8b1b572df1c/newspaper3k-0.2.8.tar.gz", hash = "sha256:9f1bd3e1fb48f400c715abf875cc7b0a67b7ddcd87f50c9aeeb8fcbbbd9004fb", size = 205685 }
1045
+ wheels = [
1046
+ { url = "https://files.pythonhosted.org/packages/d7/b9/51afecb35bb61b188a4b44868001de348a0e8134b4dfa00ffc191567c4b9/newspaper3k-0.2.8-py3-none-any.whl", hash = "sha256:44a864222633d3081113d1030615991c3dbba87239f6bbf59d91240f71a22e3e", size = 211132 },
1047
+ ]
1048
+
1049
+ [[package]]
1050
+ name = "nltk"
1051
+ version = "3.9.1"
1052
+ source = { registry = "https://pypi.org/simple" }
1053
+ dependencies = [
1054
+ { name = "click" },
1055
+ { name = "joblib" },
1056
+ { name = "regex" },
1057
+ { name = "tqdm" },
1058
+ ]
1059
+ sdist = { url = "https://files.pythonhosted.org/packages/3c/87/db8be88ad32c2d042420b6fd9ffd4a149f9a0d7f0e86b3f543be2eeeedd2/nltk-3.9.1.tar.gz", hash = "sha256:87d127bd3de4bd89a4f81265e5fa59cb1b199b27440175370f7417d2bc7ae868", size = 2904691 }
1060
+ wheels = [
1061
+ { url = "https://files.pythonhosted.org/packages/4d/66/7d9e26593edda06e8cb531874633f7c2372279c3b0f46235539fe546df8b/nltk-3.9.1-py3-none-any.whl", hash = "sha256:4fa26829c5b00715afe3061398a8989dc643b92ce7dd93fb4585a70930d168a1", size = 1505442 },
1062
+ ]
1063
+
1064
  [[package]]
1065
  name = "numpy"
1066
  version = "2.2.2"
 
1283
  { url = "https://files.pythonhosted.org/packages/ec/1a/610693ac4ee14fcdf2d9bf3c493370e4f2ef7ae2e19217d7a237ff42367d/packaging-23.2-py3-none-any.whl", hash = "sha256:8c491190033a9af7e1d931d0b5dacc2ef47509b34dd0de67ed209b5203fc88c7", size = 53011 },
1284
  ]
1285
 
1286
+ [[package]]
1287
+ name = "pillow"
1288
+ version = "11.2.1"
1289
+ source = { registry = "https://pypi.org/simple" }
1290
+ sdist = { url = "https://files.pythonhosted.org/packages/af/cb/bb5c01fcd2a69335b86c22142b2bccfc3464087efb7fd382eee5ffc7fdf7/pillow-11.2.1.tar.gz", hash = "sha256:a64dd61998416367b7ef979b73d3a85853ba9bec4c2925f74e588879a58716b6", size = 47026707 }
1291
+ wheels = [
1292
+ { url = "https://files.pythonhosted.org/packages/36/9c/447528ee3776e7ab8897fe33697a7ff3f0475bb490c5ac1456a03dc57956/pillow-11.2.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:fdec757fea0b793056419bca3e9932eb2b0ceec90ef4813ea4c1e072c389eb28", size = 3190098 },
1293
+ { url = "https://files.pythonhosted.org/packages/b5/09/29d5cd052f7566a63e5b506fac9c60526e9ecc553825551333e1e18a4858/pillow-11.2.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:b0e130705d568e2f43a17bcbe74d90958e8a16263868a12c3e0d9c8162690830", size = 3030166 },
1294
+ { url = "https://files.pythonhosted.org/packages/71/5d/446ee132ad35e7600652133f9c2840b4799bbd8e4adba881284860da0a36/pillow-11.2.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7bdb5e09068332578214cadd9c05e3d64d99e0e87591be22a324bdbc18925be0", size = 4408674 },
1295
+ { url = "https://files.pythonhosted.org/packages/69/5f/cbe509c0ddf91cc3a03bbacf40e5c2339c4912d16458fcb797bb47bcb269/pillow-11.2.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d189ba1bebfbc0c0e529159631ec72bb9e9bc041f01ec6d3233d6d82eb823bc1", size = 4496005 },
1296
+ { url = "https://files.pythonhosted.org/packages/f9/b3/dd4338d8fb8a5f312021f2977fb8198a1184893f9b00b02b75d565c33b51/pillow-11.2.1-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:191955c55d8a712fab8934a42bfefbf99dd0b5875078240943f913bb66d46d9f", size = 4518707 },
1297
+ { url = "https://files.pythonhosted.org/packages/13/eb/2552ecebc0b887f539111c2cd241f538b8ff5891b8903dfe672e997529be/pillow-11.2.1-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:ad275964d52e2243430472fc5d2c2334b4fc3ff9c16cb0a19254e25efa03a155", size = 4610008 },
1298
+ { url = "https://files.pythonhosted.org/packages/72/d1/924ce51bea494cb6e7959522d69d7b1c7e74f6821d84c63c3dc430cbbf3b/pillow-11.2.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:750f96efe0597382660d8b53e90dd1dd44568a8edb51cb7f9d5d918b80d4de14", size = 4585420 },
1299
+ { url = "https://files.pythonhosted.org/packages/43/ab/8f81312d255d713b99ca37479a4cb4b0f48195e530cdc1611990eb8fd04b/pillow-11.2.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:fe15238d3798788d00716637b3d4e7bb6bde18b26e5d08335a96e88564a36b6b", size = 4667655 },
1300
+ { url = "https://files.pythonhosted.org/packages/94/86/8f2e9d2dc3d308dfd137a07fe1cc478df0a23d42a6c4093b087e738e4827/pillow-11.2.1-cp313-cp313-win32.whl", hash = "sha256:3fe735ced9a607fee4f481423a9c36701a39719252a9bb251679635f99d0f7d2", size = 2332329 },
1301
+ { url = "https://files.pythonhosted.org/packages/6d/ec/1179083b8d6067a613e4d595359b5fdea65d0a3b7ad623fee906e1b3c4d2/pillow-11.2.1-cp313-cp313-win_amd64.whl", hash = "sha256:74ee3d7ecb3f3c05459ba95eed5efa28d6092d751ce9bf20e3e253a4e497e691", size = 2676388 },
1302
+ { url = "https://files.pythonhosted.org/packages/23/f1/2fc1e1e294de897df39fa8622d829b8828ddad938b0eaea256d65b84dd72/pillow-11.2.1-cp313-cp313-win_arm64.whl", hash = "sha256:5119225c622403afb4b44bad4c1ca6c1f98eed79db8d3bc6e4e160fc6339d66c", size = 2414950 },
1303
+ { url = "https://files.pythonhosted.org/packages/c4/3e/c328c48b3f0ead7bab765a84b4977acb29f101d10e4ef57a5e3400447c03/pillow-11.2.1-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:8ce2e8411c7aaef53e6bb29fe98f28cd4fbd9a1d9be2eeea434331aac0536b22", size = 3192759 },
1304
+ { url = "https://files.pythonhosted.org/packages/18/0e/1c68532d833fc8b9f404d3a642991441d9058eccd5606eab31617f29b6d4/pillow-11.2.1-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:9ee66787e095127116d91dea2143db65c7bb1e232f617aa5957c0d9d2a3f23a7", size = 3033284 },
1305
+ { url = "https://files.pythonhosted.org/packages/b7/cb/6faf3fb1e7705fd2db74e070f3bf6f88693601b0ed8e81049a8266de4754/pillow-11.2.1-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9622e3b6c1d8b551b6e6f21873bdcc55762b4b2126633014cea1803368a9aa16", size = 4445826 },
1306
+ { url = "https://files.pythonhosted.org/packages/07/94/8be03d50b70ca47fb434a358919d6a8d6580f282bbb7af7e4aa40103461d/pillow-11.2.1-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:63b5dff3a68f371ea06025a1a6966c9a1e1ee452fc8020c2cd0ea41b83e9037b", size = 4527329 },
1307
+ { url = "https://files.pythonhosted.org/packages/fd/a4/bfe78777076dc405e3bd2080bc32da5ab3945b5a25dc5d8acaa9de64a162/pillow-11.2.1-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:31df6e2d3d8fc99f993fd253e97fae451a8db2e7207acf97859732273e108406", size = 4549049 },
1308
+ { url = "https://files.pythonhosted.org/packages/65/4d/eaf9068dc687c24979e977ce5677e253624bd8b616b286f543f0c1b91662/pillow-11.2.1-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:062b7a42d672c45a70fa1f8b43d1d38ff76b63421cbbe7f88146b39e8a558d91", size = 4635408 },
1309
+ { url = "https://files.pythonhosted.org/packages/1d/26/0fd443365d9c63bc79feb219f97d935cd4b93af28353cba78d8e77b61719/pillow-11.2.1-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:4eb92eca2711ef8be42fd3f67533765d9fd043b8c80db204f16c8ea62ee1a751", size = 4614863 },
1310
+ { url = "https://files.pythonhosted.org/packages/49/65/dca4d2506be482c2c6641cacdba5c602bc76d8ceb618fd37de855653a419/pillow-11.2.1-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:f91ebf30830a48c825590aede79376cb40f110b387c17ee9bd59932c961044f9", size = 4692938 },
1311
+ { url = "https://files.pythonhosted.org/packages/b3/92/1ca0c3f09233bd7decf8f7105a1c4e3162fb9142128c74adad0fb361b7eb/pillow-11.2.1-cp313-cp313t-win32.whl", hash = "sha256:e0b55f27f584ed623221cfe995c912c61606be8513bfa0e07d2c674b4516d9dd", size = 2335774 },
1312
+ { url = "https://files.pythonhosted.org/packages/a5/ac/77525347cb43b83ae905ffe257bbe2cc6fd23acb9796639a1f56aa59d191/pillow-11.2.1-cp313-cp313t-win_amd64.whl", hash = "sha256:36d6b82164c39ce5482f649b437382c0fb2395eabc1e2b1702a6deb8ad647d6e", size = 2681895 },
1313
+ { url = "https://files.pythonhosted.org/packages/67/32/32dc030cfa91ca0fc52baebbba2e009bb001122a1daa8b6a79ad830b38d3/pillow-11.2.1-cp313-cp313t-win_arm64.whl", hash = "sha256:225c832a13326e34f212d2072982bb1adb210e0cc0b153e688743018c94a2681", size = 2417234 },
1314
+ ]
1315
+
1316
  [[package]]
1317
  name = "portalocker"
1318
  version = "2.10.1"
 
1485
  { url = "https://files.pythonhosted.org/packages/8e/5e/c86a5643653825d3c913719e788e41386bee415c2b87b4f955432f2de6b2/pypdf2-3.0.1-py3-none-any.whl", hash = "sha256:d16e4205cfee272fbdc0568b68d82be796540b1537508cef59388f839c191928", size = 232572 },
1486
  ]
1487
 
1488
+ [[package]]
1489
+ name = "python-dateutil"
1490
+ version = "2.9.0.post0"
1491
+ source = { registry = "https://pypi.org/simple" }
1492
+ dependencies = [
1493
+ { name = "six" },
1494
+ ]
1495
+ sdist = { url = "https://files.pythonhosted.org/packages/66/c0/0c8b6ad9f17a802ee498c46e004a0eb49bc148f2fd230864601a86dcf6db/python-dateutil-2.9.0.post0.tar.gz", hash = "sha256:37dd54208da7e1cd875388217d5e00ebd4179249f90fb72437e91a35459a0ad3", size = 342432 }
1496
+ wheels = [
1497
+ { url = "https://files.pythonhosted.org/packages/ec/57/56b9bcc3c9c6a792fcbaf139543cee77261f3651ca9da0c93f5c1221264b/python_dateutil-2.9.0.post0-py2.py3-none-any.whl", hash = "sha256:a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427", size = 229892 },
1498
+ ]
1499
+
1500
  [[package]]
1501
  name = "python-dotenv"
1502
  version = "1.1.0"
 
1623
  { url = "https://files.pythonhosted.org/packages/f9/9b/335f9764261e915ed497fcdeb11df5dfd6f7bf257d4a6a2a686d80da4d54/requests-2.32.3-py3-none-any.whl", hash = "sha256:70761cfe03c773ceb22aa2f671b4757976145175cdfca038c02654d061d6dcc6", size = 64928 },
1624
  ]
1625
 
1626
+ [[package]]
1627
+ name = "requests-file"
1628
+ version = "2.1.0"
1629
+ source = { registry = "https://pypi.org/simple" }
1630
+ dependencies = [
1631
+ { name = "requests" },
1632
+ ]
1633
+ sdist = { url = "https://files.pythonhosted.org/packages/72/97/bf44e6c6bd8ddbb99943baf7ba8b1a8485bcd2fe0e55e5708d7fee4ff1ae/requests_file-2.1.0.tar.gz", hash = "sha256:0f549a3f3b0699415ac04d167e9cb39bccfb730cb832b4d20be3d9867356e658", size = 6891 }
1634
+ wheels = [
1635
+ { url = "https://files.pythonhosted.org/packages/d7/25/dd878a121fcfdf38f52850f11c512e13ec87c2ea72385933818e5b6c15ce/requests_file-2.1.0-py2.py3-none-any.whl", hash = "sha256:cf270de5a4c5874e84599fc5778303d496c10ae5e870bfa378818f35d21bda5c", size = 4244 },
1636
+ ]
1637
+
1638
  [[package]]
1639
  name = "requests-toolbelt"
1640
  version = "1.0.0"
 
1674
  { url = "https://files.pythonhosted.org/packages/52/59/0782e51887ac6b07ffd1570e0364cf901ebc36345fea669969d2084baebb/simple_websocket-1.1.0-py3-none-any.whl", hash = "sha256:4af6069630a38ed6c561010f0e11a5bc0d4ca569b36306eb257cd9a192497c8c", size = 13842 },
1675
  ]
1676
 
1677
+ [[package]]
1678
+ name = "six"
1679
+ version = "1.17.0"
1680
+ source = { registry = "https://pypi.org/simple" }
1681
+ sdist = { url = "https://files.pythonhosted.org/packages/94/e7/b2c673351809dca68a0e064b6af791aa332cf192da575fd474ed7d6f16a2/six-1.17.0.tar.gz", hash = "sha256:ff70335d468e7eb6ec65b95b99d3a2836546063f63acc5171de367e834932a81", size = 34031 }
1682
+ wheels = [
1683
+ { url = "https://files.pythonhosted.org/packages/b7/ce/149a00dd41f10bc29e5921b496af8b574d8413afcd5e30dfa0ed46c2cc5e/six-1.17.0-py2.py3-none-any.whl", hash = "sha256:4721f391ed90541fddacab5acf947aa0d3dc7d27b2e1e8eda2be8970586c3274", size = 11050 },
1684
+ ]
1685
+
1686
  [[package]]
1687
  name = "sniffio"
1688
  version = "1.3.1"
 
1692
  { url = "https://files.pythonhosted.org/packages/e9/44/75a9c9421471a6c4805dbf2356f7c181a29c1879239abab1ea2cc8f38b40/sniffio-1.3.1-py3-none-any.whl", hash = "sha256:2f6da418d1f1e0fddd844478f41680e794e6051915791a034ff65e5f100525a2", size = 10235 },
1693
  ]
1694
 
1695
+ [[package]]
1696
+ name = "soupsieve"
1697
+ version = "2.6"
1698
+ source = { registry = "https://pypi.org/simple" }
1699
+ sdist = { url = "https://files.pythonhosted.org/packages/d7/ce/fbaeed4f9fb8b2daa961f90591662df6a86c1abf25c548329a86920aedfb/soupsieve-2.6.tar.gz", hash = "sha256:e2e68417777af359ec65daac1057404a3c8a5455bb8abc36f1a9866ab1a51abb", size = 101569 }
1700
+ wheels = [
1701
+ { url = "https://files.pythonhosted.org/packages/d1/c2/fe97d779f3ef3b15f05c94a2f1e3d21732574ed441687474db9d342a7315/soupsieve-2.6-py3-none-any.whl", hash = "sha256:e72c4ff06e4fb6e4b5a9f0f55fe6e81514581fca1515028625d0f299c602ccc9", size = 36186 },
1702
+ ]
1703
+
1704
  [[package]]
1705
  name = "sqlalchemy"
1706
  version = "2.0.40"
 
1781
  { url = "https://files.pythonhosted.org/packages/de/a8/8f499c179ec900783ffe133e9aab10044481679bb9aad78436d239eee716/tiktoken-0.9.0-cp313-cp313-win_amd64.whl", hash = "sha256:5ea0edb6f83dc56d794723286215918c1cde03712cbbafa0348b33448faf5b95", size = 894669 },
1782
  ]
1783
 
1784
+ [[package]]
1785
+ name = "tinysegmenter"
1786
+ version = "0.3"
1787
+ source = { registry = "https://pypi.org/simple" }
1788
+ sdist = { url = "https://files.pythonhosted.org/packages/17/82/86982e4b6d16e4febc79c2a1d68ee3b707e8a020c5d2bc4af8052d0f136a/tinysegmenter-0.3.tar.gz", hash = "sha256:ed1f6d2e806a4758a73be589754384cbadadc7e1a414c81a166fc9adf2d40c6d", size = 16893 }
1789
+
1790
+ [[package]]
1791
+ name = "tldextract"
1792
+ version = "5.2.0"
1793
+ source = { registry = "https://pypi.org/simple" }
1794
+ dependencies = [
1795
+ { name = "filelock" },
1796
+ { name = "idna" },
1797
+ { name = "requests" },
1798
+ { name = "requests-file" },
1799
+ ]
1800
+ sdist = { url = "https://files.pythonhosted.org/packages/20/7a/e469c4f71231a848492da31a7be6921a6cd04ecc8eed58e924bece0fb6de/tldextract-5.2.0.tar.gz", hash = "sha256:c3a8c4daf2c25a57f54d6ef6762aeac7eff5ac3da04cdb607130be757b8457ab", size = 126839 }
1801
+ wheels = [
1802
+ { url = "https://files.pythonhosted.org/packages/5e/20/b400e99827439eb91d5aa283e09d43e7e46aba66b07edf6f09404cb741da/tldextract-5.2.0-py3-none-any.whl", hash = "sha256:59509cbf99628c9440f4d19d3a1fd8488d50297ea23879c136576263c5a04eba", size = 106308 },
1803
+ ]
1804
+
1805
  [[package]]
1806
  name = "tomli"
1807
  version = "2.2.1"