Spaces:
Sleeping
Sleeping
Implement LangGraph Agent for Research with Document Retrieval and Search Tools
Browse files- Added `agent.py` to define the Research Agent's state and processing logic.
- Implemented message handling, context retrieval from documents, and model invocation.
- Created a document search tool to query uploaded documents.
- Developed a function to convert user inputs into the agent's expected format.
- Introduced a search tools module in `search_tools.py` to integrate Tavily, DuckDuckGo, and Arxiv search functionalities.
- Established a comprehensive agent chain that includes retrieval, processing, and tool execution.
- handlers/chainlit_handlers.py +18 -273
- models/agent.py +269 -0
- models/research_tools.py +13 -133
- models/search_tools.py +28 -0
- pyproject.toml +3 -0
- utils/file_processor.py +69 -22
- uv.lock +204 -0
handlers/chainlit_handlers.py
CHANGED
|
@@ -7,276 +7,16 @@ from langchain_openai import ChatOpenAI, OpenAIEmbeddings
|
|
| 7 |
from langchain_qdrant import QdrantVectorStore
|
| 8 |
from qdrant_client import QdrantClient
|
| 9 |
from qdrant_client.models import Distance, VectorParams
|
| 10 |
-
from
|
| 11 |
-
from
|
| 12 |
-
|
| 13 |
-
from langchain_core.runnables.history import RunnableWithMessageHistory
|
| 14 |
-
from langchain_core.chat_history import BaseChatMessageHistory
|
| 15 |
-
from langchain_core.prompts import MessagesPlaceholder
|
| 16 |
|
| 17 |
from utils.file_processor import process_file
|
| 18 |
from models.rag import LangChainRAG
|
| 19 |
-
from models.research_tools import
|
|
|
|
|
|
|
| 20 |
import config
|
| 21 |
-
from langchain_community.tools.tavily_search import TavilySearchResults
|
| 22 |
-
from langchain_community.tools.arxiv.tool import ArxivQueryRun
|
| 23 |
-
from typing import TypedDict, Annotated, Dict, Any, Literal, Union, cast, List, Optional
|
| 24 |
-
from langgraph.graph.message import add_messages
|
| 25 |
-
import operator
|
| 26 |
-
from langchain_core.messages import BaseMessage, SystemMessage
|
| 27 |
-
from langgraph.graph import StateGraph, END
|
| 28 |
-
from langchain_core.messages import HumanMessage
|
| 29 |
-
from langchain_community.tools import DuckDuckGoSearchResults
|
| 30 |
-
from langchain_core.documents import Document
|
| 31 |
-
from langchain_core.tools import Tool
|
| 32 |
-
|
| 33 |
-
tavily_tool = TavilySearchResults(max_results=5)
|
| 34 |
-
duckduckgo_tool = DuckDuckGoSearchResults(max_results=5)
|
| 35 |
-
arxiv_tool = ArxivQueryRun()
|
| 36 |
-
|
| 37 |
-
tool_belt = [
|
| 38 |
-
tavily_tool,
|
| 39 |
-
duckduckgo_tool,
|
| 40 |
-
arxiv_tool,
|
| 41 |
-
]
|
| 42 |
-
|
| 43 |
-
model = ChatOpenAI(model="gpt-4o", temperature=0)
|
| 44 |
-
model = model.bind_tools(tool_belt)
|
| 45 |
-
|
| 46 |
-
class ResearchAgentState(TypedDict):
|
| 47 |
-
"""
|
| 48 |
-
State definition for the Research Agent using LangGraph.
|
| 49 |
-
|
| 50 |
-
Attributes:
|
| 51 |
-
messages: List of messages in the conversation
|
| 52 |
-
context: Additional context information from RAG retrievals
|
| 53 |
-
documents: Optional list of Document objects from uploaded files
|
| 54 |
-
"""
|
| 55 |
-
messages: Annotated[list[BaseMessage], add_messages]
|
| 56 |
-
context: str
|
| 57 |
-
documents: Optional[List[Document]]
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
from langgraph.prebuilt import ToolNode
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
def call_model(state: Dict[str, Any]) -> Dict[str, list[BaseMessage]]:
|
| 64 |
-
"""
|
| 65 |
-
Process the current state through the language model.
|
| 66 |
-
|
| 67 |
-
Args:
|
| 68 |
-
state: Current state containing messages and context
|
| 69 |
-
|
| 70 |
-
Returns:
|
| 71 |
-
Updated state with model's response added to messages
|
| 72 |
-
"""
|
| 73 |
-
try:
|
| 74 |
-
messages = state["messages"]
|
| 75 |
-
context = state.get("context", "")
|
| 76 |
-
|
| 77 |
-
# Add context from documents if available
|
| 78 |
-
if context:
|
| 79 |
-
# Insert system message with context before the latest user message
|
| 80 |
-
context_message = SystemMessage(content=f"Use the following information from uploaded documents to enhance your response if relevant:\n\n{context}")
|
| 81 |
-
|
| 82 |
-
# Find the position of the last user message
|
| 83 |
-
for i in range(len(messages)-1, -1, -1):
|
| 84 |
-
if isinstance(messages[i], HumanMessage):
|
| 85 |
-
# Insert context right after the last user message
|
| 86 |
-
enhanced_messages = messages[:i+1] + [context_message] + messages[i+1:]
|
| 87 |
-
break
|
| 88 |
-
else:
|
| 89 |
-
# No user message found, just append context
|
| 90 |
-
enhanced_messages = messages + [context_message]
|
| 91 |
-
else:
|
| 92 |
-
enhanced_messages = messages
|
| 93 |
-
|
| 94 |
-
# Get response from the model
|
| 95 |
-
response = model.invoke(enhanced_messages)
|
| 96 |
-
return {"messages": [response]}
|
| 97 |
-
except Exception as e:
|
| 98 |
-
# Handle exceptions gracefully
|
| 99 |
-
error_msg = f"Error calling model: {str(e)}"
|
| 100 |
-
print(error_msg) # Log the error
|
| 101 |
-
# Return a fallback response
|
| 102 |
-
return {"messages": [HumanMessage(content=error_msg)]}
|
| 103 |
-
|
| 104 |
-
|
| 105 |
-
def should_continue(state: Dict[str, Any]) -> Union[Literal["action"], Literal[END]]:
|
| 106 |
-
"""
|
| 107 |
-
Determine if the agent should continue processing or end.
|
| 108 |
-
|
| 109 |
-
Args:
|
| 110 |
-
state: Current state containing messages and context
|
| 111 |
-
|
| 112 |
-
Returns:
|
| 113 |
-
"action" if tool calls are present, otherwise END
|
| 114 |
-
"""
|
| 115 |
-
last_message = state["messages"][-1]
|
| 116 |
-
|
| 117 |
-
if last_message.tool_calls:
|
| 118 |
-
return "action"
|
| 119 |
-
|
| 120 |
-
return END
|
| 121 |
-
|
| 122 |
-
|
| 123 |
-
def convert_inputs(input_object: Dict[str, str]) -> Dict[str, list[BaseMessage]]:
|
| 124 |
-
"""
|
| 125 |
-
Convert user input into the format expected by the agent.
|
| 126 |
-
|
| 127 |
-
Args:
|
| 128 |
-
input_object: Dictionary containing the user's question
|
| 129 |
-
|
| 130 |
-
Returns:
|
| 131 |
-
Formatted input state for the agent
|
| 132 |
-
"""
|
| 133 |
-
return {"messages": [HumanMessage(content=input_object["question"])]}
|
| 134 |
-
|
| 135 |
-
|
| 136 |
-
def parse_output(input_state: Dict[str, Any]) -> str:
|
| 137 |
-
"""
|
| 138 |
-
Extract the final response from the agent's state.
|
| 139 |
-
|
| 140 |
-
Args:
|
| 141 |
-
input_state: The final state of the agent
|
| 142 |
-
|
| 143 |
-
Returns:
|
| 144 |
-
The content of the last message
|
| 145 |
-
"""
|
| 146 |
-
try:
|
| 147 |
-
return cast(str, input_state["messages"][-1].content)
|
| 148 |
-
except (IndexError, KeyError, AttributeError) as e:
|
| 149 |
-
# Handle potential errors when accessing the output
|
| 150 |
-
error_msg = f"Error parsing output: {str(e)}"
|
| 151 |
-
print(error_msg) # Log the error
|
| 152 |
-
return "I encountered an error while processing your request."
|
| 153 |
-
|
| 154 |
-
|
| 155 |
-
def build_agent_chain() -> Any:
|
| 156 |
-
"""
|
| 157 |
-
Constructs and returns the research agent execution chain.
|
| 158 |
-
|
| 159 |
-
The chain consists of:
|
| 160 |
-
1. A retrieval node that gets context from documents
|
| 161 |
-
2. An agent node that processes messages
|
| 162 |
-
3. A tool node that executes tools when called
|
| 163 |
-
|
| 164 |
-
Returns:
|
| 165 |
-
Compiled agent chain ready for execution
|
| 166 |
-
"""
|
| 167 |
-
# Create document search tool
|
| 168 |
-
doc_search_tool = Tool(
|
| 169 |
-
name="DocumentSearch",
|
| 170 |
-
description="Search within the user's uploaded document. Use this tool when you need information from the specific document that was uploaded.",
|
| 171 |
-
func=document_search_tool,
|
| 172 |
-
args_schema=RAGQueryInput
|
| 173 |
-
)
|
| 174 |
-
|
| 175 |
-
# Add document search tool to the tool belt if we have upload capability
|
| 176 |
-
tools = tool_belt.copy()
|
| 177 |
-
tools.append(doc_search_tool)
|
| 178 |
-
|
| 179 |
-
# Create a node for tool execution
|
| 180 |
-
tool_node = ToolNode(tools)
|
| 181 |
-
|
| 182 |
-
# Initialize the graph with our state type
|
| 183 |
-
uncompiled_graph = StateGraph(ResearchAgentState)
|
| 184 |
-
|
| 185 |
-
# Add nodes
|
| 186 |
-
uncompiled_graph.add_node("retrieve", retrieve_from_documents)
|
| 187 |
-
uncompiled_graph.add_node("agent", call_model)
|
| 188 |
-
uncompiled_graph.add_node("action", tool_node)
|
| 189 |
-
|
| 190 |
-
# Set the entry point to retrieve context first
|
| 191 |
-
uncompiled_graph.set_entry_point("retrieve")
|
| 192 |
-
|
| 193 |
-
# Add edges
|
| 194 |
-
uncompiled_graph.add_edge("retrieve", "agent")
|
| 195 |
-
|
| 196 |
-
# Add conditional edges from agent
|
| 197 |
-
uncompiled_graph.add_conditional_edges(
|
| 198 |
-
"agent",
|
| 199 |
-
should_continue,
|
| 200 |
-
{
|
| 201 |
-
"action": "action",
|
| 202 |
-
END: END
|
| 203 |
-
}
|
| 204 |
-
)
|
| 205 |
-
|
| 206 |
-
# Complete the loop
|
| 207 |
-
uncompiled_graph.add_edge("action", "agent")
|
| 208 |
-
|
| 209 |
-
# Compile the graph
|
| 210 |
-
compiled_graph = uncompiled_graph.compile()
|
| 211 |
-
|
| 212 |
-
# Create the full chain
|
| 213 |
-
agent_chain = convert_inputs | compiled_graph
|
| 214 |
-
return agent_chain
|
| 215 |
-
|
| 216 |
-
|
| 217 |
-
def retrieve_from_documents(state: Dict[str, Any]) -> Dict[str, str]:
|
| 218 |
-
"""
|
| 219 |
-
Retrieve relevant context from uploaded documents based on the user query.
|
| 220 |
-
|
| 221 |
-
Args:
|
| 222 |
-
state: Current state containing messages and optional documents
|
| 223 |
-
|
| 224 |
-
Returns:
|
| 225 |
-
Updated state with context from document retrieval
|
| 226 |
-
"""
|
| 227 |
-
# Get the last user message
|
| 228 |
-
for message in reversed(state["messages"]):
|
| 229 |
-
if isinstance(message, HumanMessage):
|
| 230 |
-
query = message.content
|
| 231 |
-
break
|
| 232 |
-
else:
|
| 233 |
-
# No user message found
|
| 234 |
-
return {"context": ""}
|
| 235 |
-
|
| 236 |
-
# Skip if no documents are uploaded
|
| 237 |
-
retriever = cl.user_session.get("retriever")
|
| 238 |
-
if not retriever:
|
| 239 |
-
return {"context": ""}
|
| 240 |
-
|
| 241 |
-
try:
|
| 242 |
-
# Retrieve relevant documents
|
| 243 |
-
docs = retriever.invoke(query)
|
| 244 |
-
if not docs:
|
| 245 |
-
return {"context": ""}
|
| 246 |
-
|
| 247 |
-
# Extract text from documents
|
| 248 |
-
context = "\n\n".join([f"Document excerpt: {doc.page_content}" for doc in docs])
|
| 249 |
-
return {"context": context}
|
| 250 |
-
except Exception as e:
|
| 251 |
-
print(f"Error retrieving from documents: {str(e)}")
|
| 252 |
-
return {"context": ""}
|
| 253 |
-
|
| 254 |
-
|
| 255 |
-
def document_search_tool(query: str) -> str:
|
| 256 |
-
"""
|
| 257 |
-
Tool function to search within uploaded documents.
|
| 258 |
-
|
| 259 |
-
Args:
|
| 260 |
-
query: Search query string
|
| 261 |
-
|
| 262 |
-
Returns:
|
| 263 |
-
Information retrieved from the documents
|
| 264 |
-
"""
|
| 265 |
-
retriever = cl.user_session.get("retriever")
|
| 266 |
-
if not retriever:
|
| 267 |
-
return "No documents have been uploaded yet. Please upload a document first."
|
| 268 |
-
|
| 269 |
-
docs = retriever.invoke(query)
|
| 270 |
-
if not docs:
|
| 271 |
-
return "No relevant information found in the uploaded documents."
|
| 272 |
-
|
| 273 |
-
# Format the results
|
| 274 |
-
results = []
|
| 275 |
-
for i, doc in enumerate(docs):
|
| 276 |
-
results.append(f"[Document {i+1}] {doc.page_content}")
|
| 277 |
-
|
| 278 |
-
return "\n\n".join(results)
|
| 279 |
-
|
| 280 |
|
| 281 |
@cl.on_chat_start
|
| 282 |
async def on_chat_start():
|
|
@@ -289,8 +29,11 @@ async def on_chat_start():
|
|
| 289 |
content="Welcome to the Research Agent! I can help you research topics using web search, arXiv papers, and documents you upload."
|
| 290 |
).send()
|
| 291 |
|
|
|
|
|
|
|
|
|
|
| 292 |
# Create the agent
|
| 293 |
-
agent = build_agent_chain()
|
| 294 |
|
| 295 |
# Store agent in user session
|
| 296 |
cl.user_session.set("agent", agent)
|
|
@@ -328,7 +71,7 @@ async def main(message):
|
|
| 328 |
with cl.Step(name="Research Process", type="tool") as step:
|
| 329 |
# Run the agent executor with callbacks to stream the response
|
| 330 |
result = await agent_executor.ainvoke(
|
| 331 |
-
{"question"
|
| 332 |
config={
|
| 333 |
"callbacks": [cl.AsyncLangchainCallbackHandler()],
|
| 334 |
"configurable": {"session_id": message.id} # Add session_id from message
|
|
@@ -348,10 +91,9 @@ async def main(message):
|
|
| 348 |
).send()
|
| 349 |
|
| 350 |
# Get the final answer
|
| 351 |
-
final_answer = parse_output(result)
|
| 352 |
|
| 353 |
-
#
|
| 354 |
-
# Instead of using make_async_gen, we'll manually stream tokens from the final_answer
|
| 355 |
await msg.stream_token(final_answer)
|
| 356 |
await msg.send()
|
| 357 |
|
|
@@ -407,8 +149,11 @@ async def process_uploaded_file(file: cl.File, msg: cl.Message):
|
|
| 407 |
# Store the retriever in the user session
|
| 408 |
cl.user_session.set("retriever", retriever)
|
| 409 |
|
| 410 |
-
#
|
| 411 |
-
|
|
|
|
|
|
|
|
|
|
| 412 |
cl.user_session.set("agent", agent)
|
| 413 |
|
| 414 |
# Let the user know that the file is processed
|
|
|
|
| 7 |
from langchain_qdrant import QdrantVectorStore
|
| 8 |
from qdrant_client import QdrantClient
|
| 9 |
from qdrant_client.models import Distance, VectorParams
|
| 10 |
+
from langchain_core.tools import Tool
|
| 11 |
+
from typing import Dict, Any, List, Optional
|
| 12 |
+
from langchain_core.documents import Document
|
|
|
|
|
|
|
|
|
|
| 13 |
|
| 14 |
from utils.file_processor import process_file
|
| 15 |
from models.rag import LangChainRAG
|
| 16 |
+
from models.research_tools import RAGQueryInput
|
| 17 |
+
from models.search_tools import create_search_tools
|
| 18 |
+
from models.agent import build_agent_chain, parse_output
|
| 19 |
import config
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 20 |
|
| 21 |
@cl.on_chat_start
|
| 22 |
async def on_chat_start():
|
|
|
|
| 29 |
content="Welcome to the Research Agent! I can help you research topics using web search, arXiv papers, and documents you upload."
|
| 30 |
).send()
|
| 31 |
|
| 32 |
+
# Create search tools
|
| 33 |
+
tools = create_search_tools(max_results=config.MAX_TAVILY_SEARCH_RESULTS)
|
| 34 |
+
|
| 35 |
# Create the agent
|
| 36 |
+
agent = build_agent_chain(tools)
|
| 37 |
|
| 38 |
# Store agent in user session
|
| 39 |
cl.user_session.set("agent", agent)
|
|
|
|
| 71 |
with cl.Step(name="Research Process", type="tool") as step:
|
| 72 |
# Run the agent executor with callbacks to stream the response
|
| 73 |
result = await agent_executor.ainvoke(
|
| 74 |
+
{"question": message.content},
|
| 75 |
config={
|
| 76 |
"callbacks": [cl.AsyncLangchainCallbackHandler()],
|
| 77 |
"configurable": {"session_id": message.id} # Add session_id from message
|
|
|
|
| 91 |
).send()
|
| 92 |
|
| 93 |
# Get the final answer
|
| 94 |
+
final_answer = parse_output(result)
|
| 95 |
|
| 96 |
+
# Stream tokens from the final_answer
|
|
|
|
| 97 |
await msg.stream_token(final_answer)
|
| 98 |
await msg.send()
|
| 99 |
|
|
|
|
| 149 |
# Store the retriever in the user session
|
| 150 |
cl.user_session.set("retriever", retriever)
|
| 151 |
|
| 152 |
+
# Get the search tools
|
| 153 |
+
tools = create_search_tools(max_results=config.MAX_TAVILY_SEARCH_RESULTS)
|
| 154 |
+
|
| 155 |
+
# Rebuild the agent with the retriever
|
| 156 |
+
agent = build_agent_chain(tools, retriever)
|
| 157 |
cl.user_session.set("agent", agent)
|
| 158 |
|
| 159 |
# Let the user know that the file is processed
|
models/agent.py
ADDED
|
@@ -0,0 +1,269 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
LangGraph Agent implementation for the Research Agent.
|
| 3 |
+
"""
|
| 4 |
+
from typing import TypedDict, Annotated, Dict, Any, Literal, Union, cast, List, Optional
|
| 5 |
+
|
| 6 |
+
from langchain_openai import ChatOpenAI
|
| 7 |
+
from langchain_core.tools import Tool
|
| 8 |
+
from langchain_core.messages import BaseMessage, SystemMessage, HumanMessage
|
| 9 |
+
from langchain_core.documents import Document
|
| 10 |
+
|
| 11 |
+
from langgraph.graph.message import add_messages
|
| 12 |
+
from langgraph.graph import StateGraph, END
|
| 13 |
+
from langgraph.prebuilt import ToolNode
|
| 14 |
+
from models.research_tools import RAGQueryInput
|
| 15 |
+
|
| 16 |
+
# Define END as a string constant since we can't use it directly in type annotations
|
| 17 |
+
END_STATE = "end"
|
| 18 |
+
|
| 19 |
+
class ResearchAgentState(TypedDict):
|
| 20 |
+
"""
|
| 21 |
+
State definition for the Research Agent using LangGraph.
|
| 22 |
+
|
| 23 |
+
Attributes:
|
| 24 |
+
messages: List of messages in the conversation
|
| 25 |
+
context: Additional context information from RAG retrievals
|
| 26 |
+
documents: Optional list of Document objects from uploaded files
|
| 27 |
+
"""
|
| 28 |
+
messages: Annotated[list[BaseMessage], add_messages]
|
| 29 |
+
context: str
|
| 30 |
+
documents: Optional[List[Document]]
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
def call_model(model, state: Dict[str, Any]) -> Dict[str, list[BaseMessage]]:
|
| 34 |
+
"""
|
| 35 |
+
Process the current state through the language model.
|
| 36 |
+
|
| 37 |
+
Args:
|
| 38 |
+
model: Language model with tools bound
|
| 39 |
+
state: Current state containing messages and context
|
| 40 |
+
|
| 41 |
+
Returns:
|
| 42 |
+
Updated state with model's response added to messages
|
| 43 |
+
"""
|
| 44 |
+
try:
|
| 45 |
+
messages = state["messages"]
|
| 46 |
+
context = state.get("context", "")
|
| 47 |
+
|
| 48 |
+
# Add context from documents if available
|
| 49 |
+
if context:
|
| 50 |
+
# Insert system message with context before the latest user message
|
| 51 |
+
context_message = SystemMessage(content=f"Use the following information from uploaded documents to enhance your response if relevant:\n\n{context}")
|
| 52 |
+
|
| 53 |
+
# Find the position of the last user message
|
| 54 |
+
for i in range(len(messages)-1, -1, -1):
|
| 55 |
+
if isinstance(messages[i], HumanMessage):
|
| 56 |
+
# Insert context right after the last user message
|
| 57 |
+
enhanced_messages = messages[:i+1] + [context_message] + messages[i+1:]
|
| 58 |
+
break
|
| 59 |
+
else:
|
| 60 |
+
# No user message found, just append context
|
| 61 |
+
enhanced_messages = messages + [context_message]
|
| 62 |
+
else:
|
| 63 |
+
enhanced_messages = messages
|
| 64 |
+
|
| 65 |
+
# Get response from the model
|
| 66 |
+
response = model.invoke(enhanced_messages)
|
| 67 |
+
return {"messages": [response]}
|
| 68 |
+
except Exception as e:
|
| 69 |
+
# Handle exceptions gracefully
|
| 70 |
+
error_msg = f"Error calling model: {str(e)}"
|
| 71 |
+
print(error_msg) # Log the error
|
| 72 |
+
# Return a fallback response
|
| 73 |
+
return {"messages": [HumanMessage(content=error_msg)]}
|
| 74 |
+
|
| 75 |
+
|
| 76 |
+
def should_continue(state: Dict[str, Any]) -> Union[Literal["action"], Literal["end"]]:
|
| 77 |
+
"""
|
| 78 |
+
Determine if the agent should continue processing or end.
|
| 79 |
+
|
| 80 |
+
Args:
|
| 81 |
+
state: Current state containing messages and context
|
| 82 |
+
|
| 83 |
+
Returns:
|
| 84 |
+
"action" if tool calls are present, otherwise "end"
|
| 85 |
+
"""
|
| 86 |
+
last_message = state["messages"][-1]
|
| 87 |
+
|
| 88 |
+
if last_message.tool_calls:
|
| 89 |
+
return "action"
|
| 90 |
+
|
| 91 |
+
return "end"
|
| 92 |
+
|
| 93 |
+
|
| 94 |
+
def retrieve_from_documents(state: Dict[str, Any], retriever) -> Dict[str, str]:
|
| 95 |
+
"""
|
| 96 |
+
Retrieve relevant context from uploaded documents based on the user query.
|
| 97 |
+
|
| 98 |
+
Args:
|
| 99 |
+
state: Current state containing messages and optional documents
|
| 100 |
+
retriever: Document retriever to use
|
| 101 |
+
|
| 102 |
+
Returns:
|
| 103 |
+
Updated state with context from document retrieval
|
| 104 |
+
"""
|
| 105 |
+
# Get the last user message
|
| 106 |
+
for message in reversed(state["messages"]):
|
| 107 |
+
if isinstance(message, HumanMessage):
|
| 108 |
+
query = message.content
|
| 109 |
+
break
|
| 110 |
+
else:
|
| 111 |
+
# No user message found
|
| 112 |
+
return {"context": ""}
|
| 113 |
+
|
| 114 |
+
# Skip if no documents are uploaded
|
| 115 |
+
if not retriever:
|
| 116 |
+
return {"context": ""}
|
| 117 |
+
|
| 118 |
+
try:
|
| 119 |
+
# Retrieve relevant documents
|
| 120 |
+
docs = retriever.invoke(query)
|
| 121 |
+
if not docs:
|
| 122 |
+
return {"context": ""}
|
| 123 |
+
|
| 124 |
+
# Extract text from documents
|
| 125 |
+
context = "\n\n".join([f"Document excerpt: {doc.page_content}" for doc in docs])
|
| 126 |
+
return {"context": context}
|
| 127 |
+
except Exception as e:
|
| 128 |
+
print(f"Error retrieving from documents: {str(e)}")
|
| 129 |
+
return {"context": ""}
|
| 130 |
+
|
| 131 |
+
|
| 132 |
+
def document_search_tool(retriever, query: str) -> str:
|
| 133 |
+
"""
|
| 134 |
+
Tool function to search within uploaded documents.
|
| 135 |
+
|
| 136 |
+
Args:
|
| 137 |
+
retriever: Document retriever to use
|
| 138 |
+
query: Search query string
|
| 139 |
+
|
| 140 |
+
Returns:
|
| 141 |
+
Information retrieved from the documents
|
| 142 |
+
"""
|
| 143 |
+
if not retriever:
|
| 144 |
+
return "No documents have been uploaded yet. Please upload a document first."
|
| 145 |
+
|
| 146 |
+
docs = retriever.invoke(query)
|
| 147 |
+
if not docs:
|
| 148 |
+
return "No relevant information found in the uploaded documents."
|
| 149 |
+
|
| 150 |
+
# Format the results
|
| 151 |
+
results = []
|
| 152 |
+
for i, doc in enumerate(docs):
|
| 153 |
+
results.append(f"[Document {i+1}] {doc.page_content}")
|
| 154 |
+
|
| 155 |
+
return "\n\n".join(results)
|
| 156 |
+
|
| 157 |
+
|
| 158 |
+
def convert_inputs(input_object: Dict[str, str]) -> Dict[str, list[BaseMessage]]:
|
| 159 |
+
"""
|
| 160 |
+
Convert user input into the format expected by the agent.
|
| 161 |
+
|
| 162 |
+
Args:
|
| 163 |
+
input_object: Dictionary containing the user's question
|
| 164 |
+
|
| 165 |
+
Returns:
|
| 166 |
+
Formatted input state for the agent
|
| 167 |
+
"""
|
| 168 |
+
return {"messages": [HumanMessage(content=input_object["question"])]}
|
| 169 |
+
|
| 170 |
+
|
| 171 |
+
def parse_output(input_state: Dict[str, Any]) -> str:
|
| 172 |
+
"""
|
| 173 |
+
Extract the final response from the agent's state.
|
| 174 |
+
|
| 175 |
+
Args:
|
| 176 |
+
input_state: The final state of the agent
|
| 177 |
+
|
| 178 |
+
Returns:
|
| 179 |
+
The content of the last message
|
| 180 |
+
"""
|
| 181 |
+
try:
|
| 182 |
+
return cast(str, input_state["messages"][-1].content)
|
| 183 |
+
except (IndexError, KeyError, AttributeError) as e:
|
| 184 |
+
# Handle potential errors when accessing the output
|
| 185 |
+
error_msg = f"Error parsing output: {str(e)}"
|
| 186 |
+
print(error_msg) # Log the error
|
| 187 |
+
return "I encountered an error while processing your request."
|
| 188 |
+
|
| 189 |
+
|
| 190 |
+
def build_agent_chain(tools, retriever=None):
|
| 191 |
+
"""
|
| 192 |
+
Constructs and returns the research agent execution chain.
|
| 193 |
+
|
| 194 |
+
The chain consists of:
|
| 195 |
+
1. A retrieval node that gets context from documents
|
| 196 |
+
2. An agent node that processes messages
|
| 197 |
+
3. A tool node that executes tools when called
|
| 198 |
+
|
| 199 |
+
Args:
|
| 200 |
+
tools: List of tools for the agent
|
| 201 |
+
retriever: Optional retriever for document search
|
| 202 |
+
|
| 203 |
+
Returns:
|
| 204 |
+
Compiled agent chain ready for execution
|
| 205 |
+
"""
|
| 206 |
+
# Create an instance of ChatOpenAI
|
| 207 |
+
model = ChatOpenAI(model="gpt-4o", temperature=0)
|
| 208 |
+
model = model.bind_tools(tools)
|
| 209 |
+
|
| 210 |
+
# Create document search tool if retriever is provided
|
| 211 |
+
if retriever:
|
| 212 |
+
doc_search_tool = Tool(
|
| 213 |
+
name="DocumentSearch",
|
| 214 |
+
description="Search within the user's uploaded document. Use this tool when you need information from the specific document that was uploaded.",
|
| 215 |
+
func=lambda query: document_search_tool(retriever, query),
|
| 216 |
+
args_schema=RAGQueryInput
|
| 217 |
+
)
|
| 218 |
+
|
| 219 |
+
# Add document search tool to the tool belt if we have upload capability
|
| 220 |
+
tools = tools.copy()
|
| 221 |
+
tools.append(doc_search_tool)
|
| 222 |
+
|
| 223 |
+
# Create a node for tool execution
|
| 224 |
+
tool_node = ToolNode(tools)
|
| 225 |
+
|
| 226 |
+
# Initialize the graph with our state type
|
| 227 |
+
uncompiled_graph = StateGraph(ResearchAgentState)
|
| 228 |
+
|
| 229 |
+
# Define model node factory with bound model
|
| 230 |
+
def call_model_node(state):
|
| 231 |
+
return call_model(model, state)
|
| 232 |
+
|
| 233 |
+
# Add nodes
|
| 234 |
+
if retriever:
|
| 235 |
+
# Define retrieval node factory with bound retriever
|
| 236 |
+
def retrieve_node(state):
|
| 237 |
+
return retrieve_from_documents(state, retriever)
|
| 238 |
+
|
| 239 |
+
uncompiled_graph.add_node("retrieve", retrieve_node)
|
| 240 |
+
uncompiled_graph.set_entry_point("retrieve")
|
| 241 |
+
uncompiled_graph.add_edge("retrieve", "agent")
|
| 242 |
+
else:
|
| 243 |
+
uncompiled_graph.set_entry_point("agent")
|
| 244 |
+
|
| 245 |
+
uncompiled_graph.add_node("agent", call_model_node)
|
| 246 |
+
uncompiled_graph.add_node("action", tool_node)
|
| 247 |
+
|
| 248 |
+
# Add an end node - this is required for the "end" state to be valid
|
| 249 |
+
uncompiled_graph.add_node("end", lambda state: state)
|
| 250 |
+
|
| 251 |
+
# Add conditional edges from agent
|
| 252 |
+
uncompiled_graph.add_conditional_edges(
|
| 253 |
+
"agent",
|
| 254 |
+
should_continue,
|
| 255 |
+
{
|
| 256 |
+
"action": "action",
|
| 257 |
+
"end": END
|
| 258 |
+
}
|
| 259 |
+
)
|
| 260 |
+
|
| 261 |
+
# Complete the loop
|
| 262 |
+
uncompiled_graph.add_edge("action", "agent")
|
| 263 |
+
|
| 264 |
+
# Compile the graph
|
| 265 |
+
compiled_graph = uncompiled_graph.compile()
|
| 266 |
+
|
| 267 |
+
# Create the full chain
|
| 268 |
+
agent_chain = convert_inputs | compiled_graph
|
| 269 |
+
return agent_chain
|
models/research_tools.py
CHANGED
|
@@ -1,148 +1,28 @@
|
|
| 1 |
"""
|
| 2 |
Research tools implementation for the agent.
|
| 3 |
|
| 4 |
-
This module implements
|
| 5 |
-
that will be used by the research agent.
|
| 6 |
"""
|
| 7 |
-
import
|
| 8 |
-
from
|
| 9 |
-
from langchain.agents import tool
|
| 10 |
-
from langchain_core.tools import Tool
|
| 11 |
-
from pydantic import BaseModel, Field # Updated import from pydantic directly
|
| 12 |
-
|
| 13 |
-
from langchain_openai import ChatOpenAI
|
| 14 |
-
from langchain_community.tools.tavily_search import TavilySearchResults
|
| 15 |
-
from langchain_community.utilities.arxiv import ArxivAPIWrapper
|
| 16 |
|
| 17 |
-
import
|
| 18 |
-
from models.rag import LangChainRAG
|
| 19 |
|
| 20 |
class ArxivQueryInput(BaseModel):
|
| 21 |
"""Input for arXiv query."""
|
| 22 |
query: str = Field(..., description="The search query to find papers on arXiv")
|
| 23 |
-
max_results: int = Field(default=
|
| 24 |
|
| 25 |
class RAGQueryInput(BaseModel):
|
| 26 |
"""Input for RAG query."""
|
| 27 |
query: str = Field(..., description="The query to search in the uploaded document")
|
| 28 |
|
| 29 |
-
|
| 30 |
-
"""
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
print("Warning: TAVILY_API_KEY environment variable not set. Web search functionality may be limited.")
|
| 34 |
-
|
| 35 |
-
return TavilySearchResults(max_results=config.MAX_TAVILY_SEARCH_RESULTS)
|
| 36 |
-
|
| 37 |
-
@tool
|
| 38 |
-
def arxiv_search(query: str, max_results: int = config.MAX_ARXIV_SEARCH_RESULTS) -> str:
|
| 39 |
-
"""
|
| 40 |
-
Search for papers on arXiv.
|
| 41 |
-
|
| 42 |
-
Args:
|
| 43 |
-
query: The search query string
|
| 44 |
-
max_results: Maximum number of results to return
|
| 45 |
-
|
| 46 |
-
Returns:
|
| 47 |
-
A string summary of the search results
|
| 48 |
-
"""
|
| 49 |
-
client = ArxivAPIWrapper(
|
| 50 |
-
top_k_results=max_results,
|
| 51 |
-
ARXIV_MAX_QUERY_LENGTH=300,
|
| 52 |
-
load_max_docs=max_results,
|
| 53 |
-
load_all_available_meta=True
|
| 54 |
-
)
|
| 55 |
-
|
| 56 |
-
try:
|
| 57 |
-
results = client.run(query)
|
| 58 |
-
if not results:
|
| 59 |
-
return "No papers found on arXiv for this query."
|
| 60 |
-
|
| 61 |
-
formatted_results = []
|
| 62 |
-
for idx, result in enumerate(results.split("\n\n")):
|
| 63 |
-
if result.strip():
|
| 64 |
-
formatted_results.append(f"[{idx+1}] {result.strip()}")
|
| 65 |
-
|
| 66 |
-
return "\n\n".join(formatted_results)
|
| 67 |
-
except Exception as e:
|
| 68 |
-
return f"Error searching arXiv: {str(e)}"
|
| 69 |
|
| 70 |
-
class
|
| 71 |
-
"""
|
| 72 |
-
|
| 73 |
-
""
|
| 74 |
-
def __init__(self, rag_chain: Optional[LangChainRAG] = None):
|
| 75 |
-
"""
|
| 76 |
-
Initialize the research toolkit.
|
| 77 |
-
|
| 78 |
-
Args:
|
| 79 |
-
rag_chain: Optional RAG chain instance
|
| 80 |
-
"""
|
| 81 |
-
self.rag_chain = rag_chain
|
| 82 |
-
self.tools = self._create_tools()
|
| 83 |
-
|
| 84 |
-
def _create_tools(self) -> List[Tool]:
|
| 85 |
-
"""
|
| 86 |
-
Create the tools for the agent.
|
| 87 |
-
|
| 88 |
-
Returns:
|
| 89 |
-
List of tools
|
| 90 |
-
"""
|
| 91 |
-
tools = [
|
| 92 |
-
create_tavily_search_tool(),
|
| 93 |
-
Tool(
|
| 94 |
-
name="ArxivSearch",
|
| 95 |
-
description="Search for scientific papers on arXiv. Use this tool when you need academic or scientific information.",
|
| 96 |
-
func=arxiv_search,
|
| 97 |
-
args_schema=ArxivQueryInput
|
| 98 |
-
)
|
| 99 |
-
]
|
| 100 |
-
|
| 101 |
-
# Add RAG tool if available
|
| 102 |
-
if self.rag_chain:
|
| 103 |
-
@tool
|
| 104 |
-
def document_rag_search(query: str) -> str:
|
| 105 |
-
"""
|
| 106 |
-
Search the uploaded document using RAG.
|
| 107 |
-
|
| 108 |
-
Args:
|
| 109 |
-
query: The search query string
|
| 110 |
-
|
| 111 |
-
Returns:
|
| 112 |
-
The response from the RAG model
|
| 113 |
-
"""
|
| 114 |
-
docs = self.rag_chain.retriever.invoke(query)
|
| 115 |
-
context = "\n\n".join([doc.page_content for doc in docs])
|
| 116 |
-
response = self.rag_chain.chain.invoke(query)
|
| 117 |
-
|
| 118 |
-
return f"Based on the uploaded document: {response}"
|
| 119 |
-
|
| 120 |
-
tools.append(
|
| 121 |
-
Tool(
|
| 122 |
-
name="DocumentSearch",
|
| 123 |
-
description="Search within the user's uploaded document. Use this tool when you need information from the specific document that was uploaded.",
|
| 124 |
-
func=document_rag_search,
|
| 125 |
-
args_schema=RAGQueryInput
|
| 126 |
-
)
|
| 127 |
-
)
|
| 128 |
-
|
| 129 |
-
return tools
|
| 130 |
-
|
| 131 |
-
def get_tools(self) -> List[Tool]:
|
| 132 |
-
"""
|
| 133 |
-
Get the list of tools.
|
| 134 |
-
|
| 135 |
-
Returns:
|
| 136 |
-
List of tools
|
| 137 |
-
"""
|
| 138 |
-
return self.tools
|
| 139 |
-
|
| 140 |
-
def set_rag_chain(self, rag_chain: LangChainRAG):
|
| 141 |
-
"""
|
| 142 |
-
Update the RAG chain and rebuild tools.
|
| 143 |
-
|
| 144 |
-
Args:
|
| 145 |
-
rag_chain: New RAG chain instance
|
| 146 |
-
"""
|
| 147 |
-
self.rag_chain = rag_chain
|
| 148 |
-
self.tools = self._create_tools()
|
|
|
|
| 1 |
"""
|
| 2 |
Research tools implementation for the agent.
|
| 3 |
|
| 4 |
+
This module implements input schemas and tools specifically for research purposes.
|
|
|
|
| 5 |
"""
|
| 6 |
+
from typing import List, Optional
|
| 7 |
+
from pydantic import BaseModel, Field
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8 |
|
| 9 |
+
from langchain_core.tools import Tool
|
|
|
|
| 10 |
|
| 11 |
class ArxivQueryInput(BaseModel):
|
| 12 |
"""Input for arXiv query."""
|
| 13 |
query: str = Field(..., description="The search query to find papers on arXiv")
|
| 14 |
+
max_results: int = Field(default=5, description="The maximum number of results to return")
|
| 15 |
|
| 16 |
class RAGQueryInput(BaseModel):
|
| 17 |
"""Input for RAG query."""
|
| 18 |
query: str = Field(..., description="The query to search in the uploaded document")
|
| 19 |
|
| 20 |
+
class WebSearchInput(BaseModel):
|
| 21 |
+
"""Input for web search."""
|
| 22 |
+
query: str = Field(..., description="The search query for web search")
|
| 23 |
+
max_results: int = Field(default=5, description="The maximum number of results to return")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 24 |
|
| 25 |
+
class DocumentAnalysisInput(BaseModel):
|
| 26 |
+
"""Input for document analysis."""
|
| 27 |
+
query: str = Field(..., description="The specific question to analyze in the document")
|
| 28 |
+
include_citations: bool = Field(default=True, description="Whether to include citations in the response")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
models/search_tools.py
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Search tools module containing different search implementations.
|
| 3 |
+
"""
|
| 4 |
+
from langchain_community.tools.tavily_search import TavilySearchResults
|
| 5 |
+
from langchain_community.tools.arxiv.tool import ArxivQueryRun
|
| 6 |
+
from langchain_community.tools import DuckDuckGoSearchResults
|
| 7 |
+
from langchain_core.tools import Tool
|
| 8 |
+
|
| 9 |
+
def create_search_tools(max_results=5):
|
| 10 |
+
"""
|
| 11 |
+
Create search tools for the research agent.
|
| 12 |
+
|
| 13 |
+
Args:
|
| 14 |
+
max_results: Maximum number of results to return
|
| 15 |
+
|
| 16 |
+
Returns:
|
| 17 |
+
List of search tools for the agent
|
| 18 |
+
"""
|
| 19 |
+
# Initialize standard search tools
|
| 20 |
+
tavily_tool = TavilySearchResults(max_results=max_results)
|
| 21 |
+
duckduckgo_tool = DuckDuckGoSearchResults(max_results=max_results)
|
| 22 |
+
arxiv_tool = ArxivQueryRun()
|
| 23 |
+
|
| 24 |
+
return [
|
| 25 |
+
tavily_tool,
|
| 26 |
+
duckduckgo_tool,
|
| 27 |
+
arxiv_tool,
|
| 28 |
+
]
|
pyproject.toml
CHANGED
|
@@ -8,6 +8,7 @@ dependencies = [
|
|
| 8 |
"arxiv>=2.2.0",
|
| 9 |
"chainlit==2.0.4",
|
| 10 |
"duckduckgo-search>=8.0.1",
|
|
|
|
| 11 |
"langchain>=0.3.23",
|
| 12 |
"langchain-community>=0.3.21",
|
| 13 |
"langchain-core>=0.3.54",
|
|
@@ -16,6 +17,8 @@ dependencies = [
|
|
| 16 |
"langchain-qdrant>=0.2.0",
|
| 17 |
"langchain-text-splitters>=0.3.8",
|
| 18 |
"langgraph>=0.3.31",
|
|
|
|
|
|
|
| 19 |
"numpy==2.2.2",
|
| 20 |
"openai==1.59.9",
|
| 21 |
"pydantic==2.10.1",
|
|
|
|
| 8 |
"arxiv>=2.2.0",
|
| 9 |
"chainlit==2.0.4",
|
| 10 |
"duckduckgo-search>=8.0.1",
|
| 11 |
+
"feedparser>=6.0.11",
|
| 12 |
"langchain>=0.3.23",
|
| 13 |
"langchain-community>=0.3.21",
|
| 14 |
"langchain-core>=0.3.54",
|
|
|
|
| 17 |
"langchain-qdrant>=0.2.0",
|
| 18 |
"langchain-text-splitters>=0.3.8",
|
| 19 |
"langgraph>=0.3.31",
|
| 20 |
+
"listparser>=0.20",
|
| 21 |
+
"newspaper3k>=0.2.8",
|
| 22 |
"numpy==2.2.2",
|
| 23 |
"openai==1.59.9",
|
| 24 |
"pydantic==2.10.1",
|
utils/file_processor.py
CHANGED
|
@@ -4,24 +4,65 @@ Utilities for processing uploaded files.
|
|
| 4 |
import os
|
| 5 |
import tempfile
|
| 6 |
import shutil
|
| 7 |
-
from typing import List
|
|
|
|
| 8 |
|
| 9 |
from langchain_text_splitters import RecursiveCharacterTextSplitter
|
| 10 |
-
from langchain_community.document_loaders import
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11 |
from chainlit.types import AskFileResponse
|
| 12 |
|
| 13 |
import config
|
| 14 |
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 23 |
|
| 24 |
-
def
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 25 |
"""
|
| 26 |
Process an uploaded file and split it into text chunks.
|
| 27 |
|
|
@@ -29,28 +70,34 @@ def process_file(file: AskFileResponse):
|
|
| 29 |
file: The uploaded file response from Chainlit
|
| 30 |
|
| 31 |
Returns:
|
| 32 |
-
List of document chunks
|
| 33 |
"""
|
| 34 |
print(f"Processing file: {file.name}")
|
| 35 |
|
| 36 |
# Create a temporary file with the correct extension
|
| 37 |
suffix = f".{file.name.split('.')[-1]}"
|
| 38 |
with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as temp_file:
|
| 39 |
-
# Copy the uploaded file content to the temporary file
|
| 40 |
-
shutil.copyfile(file.path, temp_file.name)
|
| 41 |
-
print(f"Created temporary file at: {temp_file.name}")
|
| 42 |
-
|
| 43 |
try:
|
| 44 |
-
#
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
|
|
|
|
| 49 |
|
| 50 |
-
# Load
|
| 51 |
documents = loader.load()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 52 |
texts = text_splitter.split_documents(documents)
|
|
|
|
| 53 |
return texts
|
|
|
|
|
|
|
|
|
|
| 54 |
finally:
|
| 55 |
# Clean up the temporary file
|
| 56 |
try:
|
|
|
|
| 4 |
import os
|
| 5 |
import tempfile
|
| 6 |
import shutil
|
| 7 |
+
from typing import List, Optional
|
| 8 |
+
from pathlib import Path
|
| 9 |
|
| 10 |
from langchain_text_splitters import RecursiveCharacterTextSplitter
|
| 11 |
+
from langchain_community.document_loaders import (
|
| 12 |
+
PyPDFLoader,
|
| 13 |
+
TextLoader,
|
| 14 |
+
CSVLoader,
|
| 15 |
+
UnstructuredExcelLoader,
|
| 16 |
+
Docx2txtLoader
|
| 17 |
+
)
|
| 18 |
+
from langchain_core.documents import Document
|
| 19 |
from chainlit.types import AskFileResponse
|
| 20 |
|
| 21 |
import config
|
| 22 |
|
| 23 |
+
def get_document_loader(file_path: str):
|
| 24 |
+
"""
|
| 25 |
+
Get appropriate document loader based on file extension.
|
| 26 |
+
|
| 27 |
+
Args:
|
| 28 |
+
file_path: Path to the file
|
| 29 |
+
|
| 30 |
+
Returns:
|
| 31 |
+
Document loader instance
|
| 32 |
+
"""
|
| 33 |
+
file_extension = Path(file_path).suffix.lower()
|
| 34 |
+
|
| 35 |
+
# Select appropriate loader based on file extension
|
| 36 |
+
if file_extension == '.pdf':
|
| 37 |
+
return PyPDFLoader(file_path)
|
| 38 |
+
elif file_extension == '.txt' or file_extension == '.md' or file_extension == '.py':
|
| 39 |
+
return TextLoader(file_path)
|
| 40 |
+
elif file_extension == '.csv':
|
| 41 |
+
return CSVLoader(file_path)
|
| 42 |
+
elif file_extension == '.xlsx' or file_extension == '.xls':
|
| 43 |
+
return UnstructuredExcelLoader(file_path)
|
| 44 |
+
elif file_extension == '.docx' or file_extension == '.doc':
|
| 45 |
+
return Docx2txtLoader(file_path)
|
| 46 |
+
else:
|
| 47 |
+
# Default to text loader
|
| 48 |
+
return TextLoader(file_path)
|
| 49 |
|
| 50 |
+
def create_text_splitter():
|
| 51 |
+
"""
|
| 52 |
+
Create a text splitter with the configured settings.
|
| 53 |
+
|
| 54 |
+
Returns:
|
| 55 |
+
Initialized text splitter
|
| 56 |
+
"""
|
| 57 |
+
return RecursiveCharacterTextSplitter(
|
| 58 |
+
chunk_size=config.CHUNK_SIZE,
|
| 59 |
+
chunk_overlap=config.CHUNK_OVERLAP,
|
| 60 |
+
length_function=len,
|
| 61 |
+
is_separator_regex=False,
|
| 62 |
+
separators=config.SEPARATORS
|
| 63 |
+
)
|
| 64 |
+
|
| 65 |
+
def process_file(file: AskFileResponse) -> Optional[List[Document]]:
|
| 66 |
"""
|
| 67 |
Process an uploaded file and split it into text chunks.
|
| 68 |
|
|
|
|
| 70 |
file: The uploaded file response from Chainlit
|
| 71 |
|
| 72 |
Returns:
|
| 73 |
+
List of document chunks or None if processing fails
|
| 74 |
"""
|
| 75 |
print(f"Processing file: {file.name}")
|
| 76 |
|
| 77 |
# Create a temporary file with the correct extension
|
| 78 |
suffix = f".{file.name.split('.')[-1]}"
|
| 79 |
with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as temp_file:
|
|
|
|
|
|
|
|
|
|
|
|
|
| 80 |
try:
|
| 81 |
+
# Copy the uploaded file content to the temporary file
|
| 82 |
+
shutil.copyfile(file.path, temp_file.name)
|
| 83 |
+
print(f"Created temporary file at: {temp_file.name}")
|
| 84 |
+
|
| 85 |
+
# Get the appropriate loader
|
| 86 |
+
loader = get_document_loader(temp_file.name)
|
| 87 |
|
| 88 |
+
# Load documents
|
| 89 |
documents = loader.load()
|
| 90 |
+
|
| 91 |
+
# Initialize text splitter
|
| 92 |
+
text_splitter = create_text_splitter()
|
| 93 |
+
|
| 94 |
+
# Split documents into chunks
|
| 95 |
texts = text_splitter.split_documents(documents)
|
| 96 |
+
|
| 97 |
return texts
|
| 98 |
+
except Exception as e:
|
| 99 |
+
print(f"Error processing file: {e}")
|
| 100 |
+
return None
|
| 101 |
finally:
|
| 102 |
# Clean up the temporary file
|
| 103 |
try:
|
uv.lock
CHANGED
|
@@ -10,6 +10,7 @@ dependencies = [
|
|
| 10 |
{ name = "arxiv" },
|
| 11 |
{ name = "chainlit" },
|
| 12 |
{ name = "duckduckgo-search" },
|
|
|
|
| 13 |
{ name = "langchain" },
|
| 14 |
{ name = "langchain-community" },
|
| 15 |
{ name = "langchain-core" },
|
|
@@ -18,6 +19,8 @@ dependencies = [
|
|
| 18 |
{ name = "langchain-qdrant" },
|
| 19 |
{ name = "langchain-text-splitters" },
|
| 20 |
{ name = "langgraph" },
|
|
|
|
|
|
|
| 21 |
{ name = "numpy" },
|
| 22 |
{ name = "openai" },
|
| 23 |
{ name = "pydantic" },
|
|
@@ -33,6 +36,7 @@ requires-dist = [
|
|
| 33 |
{ name = "arxiv", specifier = ">=2.2.0" },
|
| 34 |
{ name = "chainlit", specifier = "==2.0.4" },
|
| 35 |
{ name = "duckduckgo-search", specifier = ">=8.0.1" },
|
|
|
|
| 36 |
{ name = "langchain", specifier = ">=0.3.23" },
|
| 37 |
{ name = "langchain-community", specifier = ">=0.3.21" },
|
| 38 |
{ name = "langchain-core", specifier = ">=0.3.54" },
|
|
@@ -41,6 +45,8 @@ requires-dist = [
|
|
| 41 |
{ name = "langchain-qdrant", specifier = ">=0.2.0" },
|
| 42 |
{ name = "langchain-text-splitters", specifier = ">=0.3.8" },
|
| 43 |
{ name = "langgraph", specifier = ">=0.3.31" },
|
|
|
|
|
|
|
| 44 |
{ name = "numpy", specifier = "==2.2.2" },
|
| 45 |
{ name = "openai", specifier = "==1.59.9" },
|
| 46 |
{ name = "pydantic", specifier = "==2.10.1" },
|
|
@@ -170,6 +176,19 @@ wheels = [
|
|
| 170 |
{ url = "https://files.pythonhosted.org/packages/77/06/bb80f5f86020c4551da315d78b3ab75e8228f89f0162f2c3a819e407941a/attrs-25.3.0-py3-none-any.whl", hash = "sha256:427318ce031701fea540783410126f03899a97ffc6f61596ad581ac2e40e3bc3", size = 63815 },
|
| 171 |
]
|
| 172 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 173 |
[[package]]
|
| 174 |
name = "bidict"
|
| 175 |
version = "0.23.1"
|
|
@@ -295,6 +314,15 @@ wheels = [
|
|
| 295 |
{ url = "https://files.pythonhosted.org/packages/d1/d6/3965ed04c63042e047cb6a3e6ed1a63a35087b6a609aa3a15ed8ac56c221/colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6", size = 25335 },
|
| 296 |
]
|
| 297 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 298 |
[[package]]
|
| 299 |
name = "dataclasses-json"
|
| 300 |
version = "0.6.7"
|
|
@@ -357,6 +385,17 @@ wheels = [
|
|
| 357 |
{ url = "https://files.pythonhosted.org/packages/50/b3/b51f09c2ba432a576fe63758bddc81f78f0c6309d9e5c10d194313bf021e/fastapi-0.115.12-py3-none-any.whl", hash = "sha256:e94613d6c05e27be7ffebdd6ea5f388112e5e430c8f7d6494a9d1d88d43e814d", size = 95164 },
|
| 358 |
]
|
| 359 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 360 |
[[package]]
|
| 361 |
name = "feedparser"
|
| 362 |
version = "6.0.11"
|
|
@@ -369,6 +408,15 @@ wheels = [
|
|
| 369 |
{ url = "https://files.pythonhosted.org/packages/7c/d4/8c31aad9cc18f451c49f7f9cfb5799dadffc88177f7917bc90a66459b1d7/feedparser-6.0.11-py3-none-any.whl", hash = "sha256:0be7ee7b395572b19ebeb1d6aafb0028dee11169f1c934e0ed67d54992f4ad45", size = 81343 },
|
| 370 |
]
|
| 371 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 372 |
[[package]]
|
| 373 |
name = "filetype"
|
| 374 |
version = "1.2.0"
|
|
@@ -602,6 +650,12 @@ wheels = [
|
|
| 602 |
{ url = "https://files.pythonhosted.org/packages/79/9d/0fb148dc4d6fa4a7dd1d8378168d9b4cd8d4560a6fbf6f0121c5fc34eb68/importlib_metadata-8.6.1-py3-none-any.whl", hash = "sha256:02a89390c1e15fdfdc0d7c6b25cb3e62650d0494005c97d6f148bf5b9787525e", size = 26971 },
|
| 603 |
]
|
| 604 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 605 |
[[package]]
|
| 606 |
name = "jiter"
|
| 607 |
version = "0.9.0"
|
|
@@ -625,6 +679,15 @@ wheels = [
|
|
| 625 |
{ url = "https://files.pythonhosted.org/packages/ee/47/3729f00f35a696e68da15d64eb9283c330e776f3b5789bac7f2c0c4df209/jiter-0.9.0-cp313-cp313t-win_amd64.whl", hash = "sha256:6f7838bc467ab7e8ef9f387bd6de195c43bad82a569c1699cb822f6609dd4cdf", size = 206867 },
|
| 626 |
]
|
| 627 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 628 |
[[package]]
|
| 629 |
name = "jsonpatch"
|
| 630 |
version = "1.33"
|
|
@@ -840,6 +903,15 @@ wheels = [
|
|
| 840 |
{ url = "https://files.pythonhosted.org/packages/03/a5/866b44697cee47d1cae429ed370281d937ad4439f71af82a6baaa139d26a/Lazify-0.4.0-py2.py3-none-any.whl", hash = "sha256:c2c17a7a33e9406897e3f66fde4cd3f84716218d580330e5af10cfe5a0cd195a", size = 3107 },
|
| 841 |
]
|
| 842 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 843 |
[[package]]
|
| 844 |
name = "literalai"
|
| 845 |
version = "0.1.103"
|
|
@@ -950,6 +1022,45 @@ wheels = [
|
|
| 950 |
{ url = "https://files.pythonhosted.org/packages/a0/c4/c2971a3ba4c6103a3d10c4b0f24f461ddc027f0f09763220cf35ca1401b3/nest_asyncio-1.6.0-py3-none-any.whl", hash = "sha256:87af6efd6b5e897c81050477ef65c62e2b2f35d51703cae01aff2905b1852e1c", size = 5195 },
|
| 951 |
]
|
| 952 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 953 |
[[package]]
|
| 954 |
name = "numpy"
|
| 955 |
version = "2.2.2"
|
|
@@ -1172,6 +1283,36 @@ wheels = [
|
|
| 1172 |
{ url = "https://files.pythonhosted.org/packages/ec/1a/610693ac4ee14fcdf2d9bf3c493370e4f2ef7ae2e19217d7a237ff42367d/packaging-23.2-py3-none-any.whl", hash = "sha256:8c491190033a9af7e1d931d0b5dacc2ef47509b34dd0de67ed209b5203fc88c7", size = 53011 },
|
| 1173 |
]
|
| 1174 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1175 |
[[package]]
|
| 1176 |
name = "portalocker"
|
| 1177 |
version = "2.10.1"
|
|
@@ -1344,6 +1485,18 @@ wheels = [
|
|
| 1344 |
{ url = "https://files.pythonhosted.org/packages/8e/5e/c86a5643653825d3c913719e788e41386bee415c2b87b4f955432f2de6b2/pypdf2-3.0.1-py3-none-any.whl", hash = "sha256:d16e4205cfee272fbdc0568b68d82be796540b1537508cef59388f839c191928", size = 232572 },
|
| 1345 |
]
|
| 1346 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1347 |
[[package]]
|
| 1348 |
name = "python-dotenv"
|
| 1349 |
version = "1.1.0"
|
|
@@ -1470,6 +1623,18 @@ wheels = [
|
|
| 1470 |
{ url = "https://files.pythonhosted.org/packages/f9/9b/335f9764261e915ed497fcdeb11df5dfd6f7bf257d4a6a2a686d80da4d54/requests-2.32.3-py3-none-any.whl", hash = "sha256:70761cfe03c773ceb22aa2f671b4757976145175cdfca038c02654d061d6dcc6", size = 64928 },
|
| 1471 |
]
|
| 1472 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1473 |
[[package]]
|
| 1474 |
name = "requests-toolbelt"
|
| 1475 |
version = "1.0.0"
|
|
@@ -1509,6 +1674,15 @@ wheels = [
|
|
| 1509 |
{ url = "https://files.pythonhosted.org/packages/52/59/0782e51887ac6b07ffd1570e0364cf901ebc36345fea669969d2084baebb/simple_websocket-1.1.0-py3-none-any.whl", hash = "sha256:4af6069630a38ed6c561010f0e11a5bc0d4ca569b36306eb257cd9a192497c8c", size = 13842 },
|
| 1510 |
]
|
| 1511 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1512 |
[[package]]
|
| 1513 |
name = "sniffio"
|
| 1514 |
version = "1.3.1"
|
|
@@ -1518,6 +1692,15 @@ wheels = [
|
|
| 1518 |
{ url = "https://files.pythonhosted.org/packages/e9/44/75a9c9421471a6c4805dbf2356f7c181a29c1879239abab1ea2cc8f38b40/sniffio-1.3.1-py3-none-any.whl", hash = "sha256:2f6da418d1f1e0fddd844478f41680e794e6051915791a034ff65e5f100525a2", size = 10235 },
|
| 1519 |
]
|
| 1520 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1521 |
[[package]]
|
| 1522 |
name = "sqlalchemy"
|
| 1523 |
version = "2.0.40"
|
|
@@ -1598,6 +1781,27 @@ wheels = [
|
|
| 1598 |
{ url = "https://files.pythonhosted.org/packages/de/a8/8f499c179ec900783ffe133e9aab10044481679bb9aad78436d239eee716/tiktoken-0.9.0-cp313-cp313-win_amd64.whl", hash = "sha256:5ea0edb6f83dc56d794723286215918c1cde03712cbbafa0348b33448faf5b95", size = 894669 },
|
| 1599 |
]
|
| 1600 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1601 |
[[package]]
|
| 1602 |
name = "tomli"
|
| 1603 |
version = "2.2.1"
|
|
|
|
| 10 |
{ name = "arxiv" },
|
| 11 |
{ name = "chainlit" },
|
| 12 |
{ name = "duckduckgo-search" },
|
| 13 |
+
{ name = "feedparser" },
|
| 14 |
{ name = "langchain" },
|
| 15 |
{ name = "langchain-community" },
|
| 16 |
{ name = "langchain-core" },
|
|
|
|
| 19 |
{ name = "langchain-qdrant" },
|
| 20 |
{ name = "langchain-text-splitters" },
|
| 21 |
{ name = "langgraph" },
|
| 22 |
+
{ name = "listparser" },
|
| 23 |
+
{ name = "newspaper3k" },
|
| 24 |
{ name = "numpy" },
|
| 25 |
{ name = "openai" },
|
| 26 |
{ name = "pydantic" },
|
|
|
|
| 36 |
{ name = "arxiv", specifier = ">=2.2.0" },
|
| 37 |
{ name = "chainlit", specifier = "==2.0.4" },
|
| 38 |
{ name = "duckduckgo-search", specifier = ">=8.0.1" },
|
| 39 |
+
{ name = "feedparser", specifier = ">=6.0.11" },
|
| 40 |
{ name = "langchain", specifier = ">=0.3.23" },
|
| 41 |
{ name = "langchain-community", specifier = ">=0.3.21" },
|
| 42 |
{ name = "langchain-core", specifier = ">=0.3.54" },
|
|
|
|
| 45 |
{ name = "langchain-qdrant", specifier = ">=0.2.0" },
|
| 46 |
{ name = "langchain-text-splitters", specifier = ">=0.3.8" },
|
| 47 |
{ name = "langgraph", specifier = ">=0.3.31" },
|
| 48 |
+
{ name = "listparser", specifier = ">=0.20" },
|
| 49 |
+
{ name = "newspaper3k", specifier = ">=0.2.8" },
|
| 50 |
{ name = "numpy", specifier = "==2.2.2" },
|
| 51 |
{ name = "openai", specifier = "==1.59.9" },
|
| 52 |
{ name = "pydantic", specifier = "==2.10.1" },
|
|
|
|
| 176 |
{ url = "https://files.pythonhosted.org/packages/77/06/bb80f5f86020c4551da315d78b3ab75e8228f89f0162f2c3a819e407941a/attrs-25.3.0-py3-none-any.whl", hash = "sha256:427318ce031701fea540783410126f03899a97ffc6f61596ad581ac2e40e3bc3", size = 63815 },
|
| 177 |
]
|
| 178 |
|
| 179 |
+
[[package]]
|
| 180 |
+
name = "beautifulsoup4"
|
| 181 |
+
version = "4.13.4"
|
| 182 |
+
source = { registry = "https://pypi.org/simple" }
|
| 183 |
+
dependencies = [
|
| 184 |
+
{ name = "soupsieve" },
|
| 185 |
+
{ name = "typing-extensions" },
|
| 186 |
+
]
|
| 187 |
+
sdist = { url = "https://files.pythonhosted.org/packages/d8/e4/0c4c39e18fd76d6a628d4dd8da40543d136ce2d1752bd6eeeab0791f4d6b/beautifulsoup4-4.13.4.tar.gz", hash = "sha256:dbb3c4e1ceae6aefebdaf2423247260cd062430a410e38c66f2baa50a8437195", size = 621067 }
|
| 188 |
+
wheels = [
|
| 189 |
+
{ url = "https://files.pythonhosted.org/packages/50/cd/30110dc0ffcf3b131156077b90e9f60ed75711223f306da4db08eff8403b/beautifulsoup4-4.13.4-py3-none-any.whl", hash = "sha256:9bbbb14bfde9d79f38b8cd5f8c7c85f4b8f2523190ebed90e950a8dea4cb1c4b", size = 187285 },
|
| 190 |
+
]
|
| 191 |
+
|
| 192 |
[[package]]
|
| 193 |
name = "bidict"
|
| 194 |
version = "0.23.1"
|
|
|
|
| 314 |
{ url = "https://files.pythonhosted.org/packages/d1/d6/3965ed04c63042e047cb6a3e6ed1a63a35087b6a609aa3a15ed8ac56c221/colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6", size = 25335 },
|
| 315 |
]
|
| 316 |
|
| 317 |
+
[[package]]
|
| 318 |
+
name = "cssselect"
|
| 319 |
+
version = "1.3.0"
|
| 320 |
+
source = { registry = "https://pypi.org/simple" }
|
| 321 |
+
sdist = { url = "https://files.pythonhosted.org/packages/72/0a/c3ea9573b1dc2e151abfe88c7fe0c26d1892fe6ed02d0cdb30f0d57029d5/cssselect-1.3.0.tar.gz", hash = "sha256:57f8a99424cfab289a1b6a816a43075a4b00948c86b4dcf3ef4ee7e15f7ab0c7", size = 42870 }
|
| 322 |
+
wheels = [
|
| 323 |
+
{ url = "https://files.pythonhosted.org/packages/ee/58/257350f7db99b4ae12b614a36256d9cc870d71d9e451e79c2dc3b23d7c3c/cssselect-1.3.0-py3-none-any.whl", hash = "sha256:56d1bf3e198080cc1667e137bc51de9cadfca259f03c2d4e09037b3e01e30f0d", size = 18786 },
|
| 324 |
+
]
|
| 325 |
+
|
| 326 |
[[package]]
|
| 327 |
name = "dataclasses-json"
|
| 328 |
version = "0.6.7"
|
|
|
|
| 385 |
{ url = "https://files.pythonhosted.org/packages/50/b3/b51f09c2ba432a576fe63758bddc81f78f0c6309d9e5c10d194313bf021e/fastapi-0.115.12-py3-none-any.whl", hash = "sha256:e94613d6c05e27be7ffebdd6ea5f388112e5e430c8f7d6494a9d1d88d43e814d", size = 95164 },
|
| 386 |
]
|
| 387 |
|
| 388 |
+
[[package]]
|
| 389 |
+
name = "feedfinder2"
|
| 390 |
+
version = "0.0.4"
|
| 391 |
+
source = { registry = "https://pypi.org/simple" }
|
| 392 |
+
dependencies = [
|
| 393 |
+
{ name = "beautifulsoup4" },
|
| 394 |
+
{ name = "requests" },
|
| 395 |
+
{ name = "six" },
|
| 396 |
+
]
|
| 397 |
+
sdist = { url = "https://files.pythonhosted.org/packages/35/82/1251fefec3bb4b03fd966c7e7f7a41c9fc2bb00d823a34c13f847fd61406/feedfinder2-0.0.4.tar.gz", hash = "sha256:3701ee01a6c85f8b865a049c30ba0b4608858c803fe8e30d1d289fdbe89d0efe", size = 3297 }
|
| 398 |
+
|
| 399 |
[[package]]
|
| 400 |
name = "feedparser"
|
| 401 |
version = "6.0.11"
|
|
|
|
| 408 |
{ url = "https://files.pythonhosted.org/packages/7c/d4/8c31aad9cc18f451c49f7f9cfb5799dadffc88177f7917bc90a66459b1d7/feedparser-6.0.11-py3-none-any.whl", hash = "sha256:0be7ee7b395572b19ebeb1d6aafb0028dee11169f1c934e0ed67d54992f4ad45", size = 81343 },
|
| 409 |
]
|
| 410 |
|
| 411 |
+
[[package]]
|
| 412 |
+
name = "filelock"
|
| 413 |
+
version = "3.18.0"
|
| 414 |
+
source = { registry = "https://pypi.org/simple" }
|
| 415 |
+
sdist = { url = "https://files.pythonhosted.org/packages/0a/10/c23352565a6544bdc5353e0b15fc1c563352101f30e24bf500207a54df9a/filelock-3.18.0.tar.gz", hash = "sha256:adbc88eabb99d2fec8c9c1b229b171f18afa655400173ddc653d5d01501fb9f2", size = 18075 }
|
| 416 |
+
wheels = [
|
| 417 |
+
{ url = "https://files.pythonhosted.org/packages/4d/36/2a115987e2d8c300a974597416d9de88f2444426de9571f4b59b2cca3acc/filelock-3.18.0-py3-none-any.whl", hash = "sha256:c401f4f8377c4464e6db25fff06205fd89bdd83b65eb0488ed1b160f780e21de", size = 16215 },
|
| 418 |
+
]
|
| 419 |
+
|
| 420 |
[[package]]
|
| 421 |
name = "filetype"
|
| 422 |
version = "1.2.0"
|
|
|
|
| 650 |
{ url = "https://files.pythonhosted.org/packages/79/9d/0fb148dc4d6fa4a7dd1d8378168d9b4cd8d4560a6fbf6f0121c5fc34eb68/importlib_metadata-8.6.1-py3-none-any.whl", hash = "sha256:02a89390c1e15fdfdc0d7c6b25cb3e62650d0494005c97d6f148bf5b9787525e", size = 26971 },
|
| 651 |
]
|
| 652 |
|
| 653 |
+
[[package]]
|
| 654 |
+
name = "jieba3k"
|
| 655 |
+
version = "0.35.1"
|
| 656 |
+
source = { registry = "https://pypi.org/simple" }
|
| 657 |
+
sdist = { url = "https://files.pythonhosted.org/packages/a9/cb/2c8332bcdc14d33b0bedd18ae0a4981a069c3513e445120da3c3f23a8aaa/jieba3k-0.35.1.zip", hash = "sha256:980a4f2636b778d312518066be90c7697d410dd5a472385f5afced71a2db1c10", size = 7423646 }
|
| 658 |
+
|
| 659 |
[[package]]
|
| 660 |
name = "jiter"
|
| 661 |
version = "0.9.0"
|
|
|
|
| 679 |
{ url = "https://files.pythonhosted.org/packages/ee/47/3729f00f35a696e68da15d64eb9283c330e776f3b5789bac7f2c0c4df209/jiter-0.9.0-cp313-cp313t-win_amd64.whl", hash = "sha256:6f7838bc467ab7e8ef9f387bd6de195c43bad82a569c1699cb822f6609dd4cdf", size = 206867 },
|
| 680 |
]
|
| 681 |
|
| 682 |
+
[[package]]
|
| 683 |
+
name = "joblib"
|
| 684 |
+
version = "1.4.2"
|
| 685 |
+
source = { registry = "https://pypi.org/simple" }
|
| 686 |
+
sdist = { url = "https://files.pythonhosted.org/packages/64/33/60135848598c076ce4b231e1b1895170f45fbcaeaa2c9d5e38b04db70c35/joblib-1.4.2.tar.gz", hash = "sha256:2382c5816b2636fbd20a09e0f4e9dad4736765fdfb7dca582943b9c1366b3f0e", size = 2116621 }
|
| 687 |
+
wheels = [
|
| 688 |
+
{ url = "https://files.pythonhosted.org/packages/91/29/df4b9b42f2be0b623cbd5e2140cafcaa2bef0759a00b7b70104dcfe2fb51/joblib-1.4.2-py3-none-any.whl", hash = "sha256:06d478d5674cbc267e7496a410ee875abd68e4340feff4490bcb7afb88060ae6", size = 301817 },
|
| 689 |
+
]
|
| 690 |
+
|
| 691 |
[[package]]
|
| 692 |
name = "jsonpatch"
|
| 693 |
version = "1.33"
|
|
|
|
| 903 |
{ url = "https://files.pythonhosted.org/packages/03/a5/866b44697cee47d1cae429ed370281d937ad4439f71af82a6baaa139d26a/Lazify-0.4.0-py2.py3-none-any.whl", hash = "sha256:c2c17a7a33e9406897e3f66fde4cd3f84716218d580330e5af10cfe5a0cd195a", size = 3107 },
|
| 904 |
]
|
| 905 |
|
| 906 |
+
[[package]]
|
| 907 |
+
name = "listparser"
|
| 908 |
+
version = "0.20"
|
| 909 |
+
source = { registry = "https://pypi.org/simple" }
|
| 910 |
+
sdist = { url = "https://files.pythonhosted.org/packages/be/ee/d9f02600955ca34baf73e824d64b181b412745ed448a0ad1a92cef81115b/listparser-0.20.tar.gz", hash = "sha256:0dda5b41ca9531fc3c438eb4abf4d8a7cf03ef050d196875993e897a66c1f885", size = 12404 }
|
| 911 |
+
wheels = [
|
| 912 |
+
{ url = "https://files.pythonhosted.org/packages/c9/27/bd96818acce8ed1909dff29817096016f5e958ef646a377b34d55afa23b3/listparser-0.20-py3-none-any.whl", hash = "sha256:5daae9895b75191a77b14f5b8eabf7a63a4ca440f215d9bd8d8e5a2eccde02ce", size = 14149 },
|
| 913 |
+
]
|
| 914 |
+
|
| 915 |
[[package]]
|
| 916 |
name = "literalai"
|
| 917 |
version = "0.1.103"
|
|
|
|
| 1022 |
{ url = "https://files.pythonhosted.org/packages/a0/c4/c2971a3ba4c6103a3d10c4b0f24f461ddc027f0f09763220cf35ca1401b3/nest_asyncio-1.6.0-py3-none-any.whl", hash = "sha256:87af6efd6b5e897c81050477ef65c62e2b2f35d51703cae01aff2905b1852e1c", size = 5195 },
|
| 1023 |
]
|
| 1024 |
|
| 1025 |
+
[[package]]
|
| 1026 |
+
name = "newspaper3k"
|
| 1027 |
+
version = "0.2.8"
|
| 1028 |
+
source = { registry = "https://pypi.org/simple" }
|
| 1029 |
+
dependencies = [
|
| 1030 |
+
{ name = "beautifulsoup4" },
|
| 1031 |
+
{ name = "cssselect" },
|
| 1032 |
+
{ name = "feedfinder2" },
|
| 1033 |
+
{ name = "feedparser" },
|
| 1034 |
+
{ name = "jieba3k" },
|
| 1035 |
+
{ name = "lxml" },
|
| 1036 |
+
{ name = "nltk" },
|
| 1037 |
+
{ name = "pillow" },
|
| 1038 |
+
{ name = "python-dateutil" },
|
| 1039 |
+
{ name = "pyyaml" },
|
| 1040 |
+
{ name = "requests" },
|
| 1041 |
+
{ name = "tinysegmenter" },
|
| 1042 |
+
{ name = "tldextract" },
|
| 1043 |
+
]
|
| 1044 |
+
sdist = { url = "https://files.pythonhosted.org/packages/ce/fb/8f8525be0cafa48926e85b0c06a7cb3e2a892d340b8036f8c8b1b572df1c/newspaper3k-0.2.8.tar.gz", hash = "sha256:9f1bd3e1fb48f400c715abf875cc7b0a67b7ddcd87f50c9aeeb8fcbbbd9004fb", size = 205685 }
|
| 1045 |
+
wheels = [
|
| 1046 |
+
{ url = "https://files.pythonhosted.org/packages/d7/b9/51afecb35bb61b188a4b44868001de348a0e8134b4dfa00ffc191567c4b9/newspaper3k-0.2.8-py3-none-any.whl", hash = "sha256:44a864222633d3081113d1030615991c3dbba87239f6bbf59d91240f71a22e3e", size = 211132 },
|
| 1047 |
+
]
|
| 1048 |
+
|
| 1049 |
+
[[package]]
|
| 1050 |
+
name = "nltk"
|
| 1051 |
+
version = "3.9.1"
|
| 1052 |
+
source = { registry = "https://pypi.org/simple" }
|
| 1053 |
+
dependencies = [
|
| 1054 |
+
{ name = "click" },
|
| 1055 |
+
{ name = "joblib" },
|
| 1056 |
+
{ name = "regex" },
|
| 1057 |
+
{ name = "tqdm" },
|
| 1058 |
+
]
|
| 1059 |
+
sdist = { url = "https://files.pythonhosted.org/packages/3c/87/db8be88ad32c2d042420b6fd9ffd4a149f9a0d7f0e86b3f543be2eeeedd2/nltk-3.9.1.tar.gz", hash = "sha256:87d127bd3de4bd89a4f81265e5fa59cb1b199b27440175370f7417d2bc7ae868", size = 2904691 }
|
| 1060 |
+
wheels = [
|
| 1061 |
+
{ url = "https://files.pythonhosted.org/packages/4d/66/7d9e26593edda06e8cb531874633f7c2372279c3b0f46235539fe546df8b/nltk-3.9.1-py3-none-any.whl", hash = "sha256:4fa26829c5b00715afe3061398a8989dc643b92ce7dd93fb4585a70930d168a1", size = 1505442 },
|
| 1062 |
+
]
|
| 1063 |
+
|
| 1064 |
[[package]]
|
| 1065 |
name = "numpy"
|
| 1066 |
version = "2.2.2"
|
|
|
|
| 1283 |
{ url = "https://files.pythonhosted.org/packages/ec/1a/610693ac4ee14fcdf2d9bf3c493370e4f2ef7ae2e19217d7a237ff42367d/packaging-23.2-py3-none-any.whl", hash = "sha256:8c491190033a9af7e1d931d0b5dacc2ef47509b34dd0de67ed209b5203fc88c7", size = 53011 },
|
| 1284 |
]
|
| 1285 |
|
| 1286 |
+
[[package]]
|
| 1287 |
+
name = "pillow"
|
| 1288 |
+
version = "11.2.1"
|
| 1289 |
+
source = { registry = "https://pypi.org/simple" }
|
| 1290 |
+
sdist = { url = "https://files.pythonhosted.org/packages/af/cb/bb5c01fcd2a69335b86c22142b2bccfc3464087efb7fd382eee5ffc7fdf7/pillow-11.2.1.tar.gz", hash = "sha256:a64dd61998416367b7ef979b73d3a85853ba9bec4c2925f74e588879a58716b6", size = 47026707 }
|
| 1291 |
+
wheels = [
|
| 1292 |
+
{ url = "https://files.pythonhosted.org/packages/36/9c/447528ee3776e7ab8897fe33697a7ff3f0475bb490c5ac1456a03dc57956/pillow-11.2.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:fdec757fea0b793056419bca3e9932eb2b0ceec90ef4813ea4c1e072c389eb28", size = 3190098 },
|
| 1293 |
+
{ url = "https://files.pythonhosted.org/packages/b5/09/29d5cd052f7566a63e5b506fac9c60526e9ecc553825551333e1e18a4858/pillow-11.2.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:b0e130705d568e2f43a17bcbe74d90958e8a16263868a12c3e0d9c8162690830", size = 3030166 },
|
| 1294 |
+
{ url = "https://files.pythonhosted.org/packages/71/5d/446ee132ad35e7600652133f9c2840b4799bbd8e4adba881284860da0a36/pillow-11.2.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7bdb5e09068332578214cadd9c05e3d64d99e0e87591be22a324bdbc18925be0", size = 4408674 },
|
| 1295 |
+
{ url = "https://files.pythonhosted.org/packages/69/5f/cbe509c0ddf91cc3a03bbacf40e5c2339c4912d16458fcb797bb47bcb269/pillow-11.2.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d189ba1bebfbc0c0e529159631ec72bb9e9bc041f01ec6d3233d6d82eb823bc1", size = 4496005 },
|
| 1296 |
+
{ url = "https://files.pythonhosted.org/packages/f9/b3/dd4338d8fb8a5f312021f2977fb8198a1184893f9b00b02b75d565c33b51/pillow-11.2.1-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:191955c55d8a712fab8934a42bfefbf99dd0b5875078240943f913bb66d46d9f", size = 4518707 },
|
| 1297 |
+
{ url = "https://files.pythonhosted.org/packages/13/eb/2552ecebc0b887f539111c2cd241f538b8ff5891b8903dfe672e997529be/pillow-11.2.1-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:ad275964d52e2243430472fc5d2c2334b4fc3ff9c16cb0a19254e25efa03a155", size = 4610008 },
|
| 1298 |
+
{ url = "https://files.pythonhosted.org/packages/72/d1/924ce51bea494cb6e7959522d69d7b1c7e74f6821d84c63c3dc430cbbf3b/pillow-11.2.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:750f96efe0597382660d8b53e90dd1dd44568a8edb51cb7f9d5d918b80d4de14", size = 4585420 },
|
| 1299 |
+
{ url = "https://files.pythonhosted.org/packages/43/ab/8f81312d255d713b99ca37479a4cb4b0f48195e530cdc1611990eb8fd04b/pillow-11.2.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:fe15238d3798788d00716637b3d4e7bb6bde18b26e5d08335a96e88564a36b6b", size = 4667655 },
|
| 1300 |
+
{ url = "https://files.pythonhosted.org/packages/94/86/8f2e9d2dc3d308dfd137a07fe1cc478df0a23d42a6c4093b087e738e4827/pillow-11.2.1-cp313-cp313-win32.whl", hash = "sha256:3fe735ced9a607fee4f481423a9c36701a39719252a9bb251679635f99d0f7d2", size = 2332329 },
|
| 1301 |
+
{ url = "https://files.pythonhosted.org/packages/6d/ec/1179083b8d6067a613e4d595359b5fdea65d0a3b7ad623fee906e1b3c4d2/pillow-11.2.1-cp313-cp313-win_amd64.whl", hash = "sha256:74ee3d7ecb3f3c05459ba95eed5efa28d6092d751ce9bf20e3e253a4e497e691", size = 2676388 },
|
| 1302 |
+
{ url = "https://files.pythonhosted.org/packages/23/f1/2fc1e1e294de897df39fa8622d829b8828ddad938b0eaea256d65b84dd72/pillow-11.2.1-cp313-cp313-win_arm64.whl", hash = "sha256:5119225c622403afb4b44bad4c1ca6c1f98eed79db8d3bc6e4e160fc6339d66c", size = 2414950 },
|
| 1303 |
+
{ url = "https://files.pythonhosted.org/packages/c4/3e/c328c48b3f0ead7bab765a84b4977acb29f101d10e4ef57a5e3400447c03/pillow-11.2.1-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:8ce2e8411c7aaef53e6bb29fe98f28cd4fbd9a1d9be2eeea434331aac0536b22", size = 3192759 },
|
| 1304 |
+
{ url = "https://files.pythonhosted.org/packages/18/0e/1c68532d833fc8b9f404d3a642991441d9058eccd5606eab31617f29b6d4/pillow-11.2.1-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:9ee66787e095127116d91dea2143db65c7bb1e232f617aa5957c0d9d2a3f23a7", size = 3033284 },
|
| 1305 |
+
{ url = "https://files.pythonhosted.org/packages/b7/cb/6faf3fb1e7705fd2db74e070f3bf6f88693601b0ed8e81049a8266de4754/pillow-11.2.1-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9622e3b6c1d8b551b6e6f21873bdcc55762b4b2126633014cea1803368a9aa16", size = 4445826 },
|
| 1306 |
+
{ url = "https://files.pythonhosted.org/packages/07/94/8be03d50b70ca47fb434a358919d6a8d6580f282bbb7af7e4aa40103461d/pillow-11.2.1-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:63b5dff3a68f371ea06025a1a6966c9a1e1ee452fc8020c2cd0ea41b83e9037b", size = 4527329 },
|
| 1307 |
+
{ url = "https://files.pythonhosted.org/packages/fd/a4/bfe78777076dc405e3bd2080bc32da5ab3945b5a25dc5d8acaa9de64a162/pillow-11.2.1-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:31df6e2d3d8fc99f993fd253e97fae451a8db2e7207acf97859732273e108406", size = 4549049 },
|
| 1308 |
+
{ url = "https://files.pythonhosted.org/packages/65/4d/eaf9068dc687c24979e977ce5677e253624bd8b616b286f543f0c1b91662/pillow-11.2.1-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:062b7a42d672c45a70fa1f8b43d1d38ff76b63421cbbe7f88146b39e8a558d91", size = 4635408 },
|
| 1309 |
+
{ url = "https://files.pythonhosted.org/packages/1d/26/0fd443365d9c63bc79feb219f97d935cd4b93af28353cba78d8e77b61719/pillow-11.2.1-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:4eb92eca2711ef8be42fd3f67533765d9fd043b8c80db204f16c8ea62ee1a751", size = 4614863 },
|
| 1310 |
+
{ url = "https://files.pythonhosted.org/packages/49/65/dca4d2506be482c2c6641cacdba5c602bc76d8ceb618fd37de855653a419/pillow-11.2.1-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:f91ebf30830a48c825590aede79376cb40f110b387c17ee9bd59932c961044f9", size = 4692938 },
|
| 1311 |
+
{ url = "https://files.pythonhosted.org/packages/b3/92/1ca0c3f09233bd7decf8f7105a1c4e3162fb9142128c74adad0fb361b7eb/pillow-11.2.1-cp313-cp313t-win32.whl", hash = "sha256:e0b55f27f584ed623221cfe995c912c61606be8513bfa0e07d2c674b4516d9dd", size = 2335774 },
|
| 1312 |
+
{ url = "https://files.pythonhosted.org/packages/a5/ac/77525347cb43b83ae905ffe257bbe2cc6fd23acb9796639a1f56aa59d191/pillow-11.2.1-cp313-cp313t-win_amd64.whl", hash = "sha256:36d6b82164c39ce5482f649b437382c0fb2395eabc1e2b1702a6deb8ad647d6e", size = 2681895 },
|
| 1313 |
+
{ url = "https://files.pythonhosted.org/packages/67/32/32dc030cfa91ca0fc52baebbba2e009bb001122a1daa8b6a79ad830b38d3/pillow-11.2.1-cp313-cp313t-win_arm64.whl", hash = "sha256:225c832a13326e34f212d2072982bb1adb210e0cc0b153e688743018c94a2681", size = 2417234 },
|
| 1314 |
+
]
|
| 1315 |
+
|
| 1316 |
[[package]]
|
| 1317 |
name = "portalocker"
|
| 1318 |
version = "2.10.1"
|
|
|
|
| 1485 |
{ url = "https://files.pythonhosted.org/packages/8e/5e/c86a5643653825d3c913719e788e41386bee415c2b87b4f955432f2de6b2/pypdf2-3.0.1-py3-none-any.whl", hash = "sha256:d16e4205cfee272fbdc0568b68d82be796540b1537508cef59388f839c191928", size = 232572 },
|
| 1486 |
]
|
| 1487 |
|
| 1488 |
+
[[package]]
|
| 1489 |
+
name = "python-dateutil"
|
| 1490 |
+
version = "2.9.0.post0"
|
| 1491 |
+
source = { registry = "https://pypi.org/simple" }
|
| 1492 |
+
dependencies = [
|
| 1493 |
+
{ name = "six" },
|
| 1494 |
+
]
|
| 1495 |
+
sdist = { url = "https://files.pythonhosted.org/packages/66/c0/0c8b6ad9f17a802ee498c46e004a0eb49bc148f2fd230864601a86dcf6db/python-dateutil-2.9.0.post0.tar.gz", hash = "sha256:37dd54208da7e1cd875388217d5e00ebd4179249f90fb72437e91a35459a0ad3", size = 342432 }
|
| 1496 |
+
wheels = [
|
| 1497 |
+
{ url = "https://files.pythonhosted.org/packages/ec/57/56b9bcc3c9c6a792fcbaf139543cee77261f3651ca9da0c93f5c1221264b/python_dateutil-2.9.0.post0-py2.py3-none-any.whl", hash = "sha256:a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427", size = 229892 },
|
| 1498 |
+
]
|
| 1499 |
+
|
| 1500 |
[[package]]
|
| 1501 |
name = "python-dotenv"
|
| 1502 |
version = "1.1.0"
|
|
|
|
| 1623 |
{ url = "https://files.pythonhosted.org/packages/f9/9b/335f9764261e915ed497fcdeb11df5dfd6f7bf257d4a6a2a686d80da4d54/requests-2.32.3-py3-none-any.whl", hash = "sha256:70761cfe03c773ceb22aa2f671b4757976145175cdfca038c02654d061d6dcc6", size = 64928 },
|
| 1624 |
]
|
| 1625 |
|
| 1626 |
+
[[package]]
|
| 1627 |
+
name = "requests-file"
|
| 1628 |
+
version = "2.1.0"
|
| 1629 |
+
source = { registry = "https://pypi.org/simple" }
|
| 1630 |
+
dependencies = [
|
| 1631 |
+
{ name = "requests" },
|
| 1632 |
+
]
|
| 1633 |
+
sdist = { url = "https://files.pythonhosted.org/packages/72/97/bf44e6c6bd8ddbb99943baf7ba8b1a8485bcd2fe0e55e5708d7fee4ff1ae/requests_file-2.1.0.tar.gz", hash = "sha256:0f549a3f3b0699415ac04d167e9cb39bccfb730cb832b4d20be3d9867356e658", size = 6891 }
|
| 1634 |
+
wheels = [
|
| 1635 |
+
{ url = "https://files.pythonhosted.org/packages/d7/25/dd878a121fcfdf38f52850f11c512e13ec87c2ea72385933818e5b6c15ce/requests_file-2.1.0-py2.py3-none-any.whl", hash = "sha256:cf270de5a4c5874e84599fc5778303d496c10ae5e870bfa378818f35d21bda5c", size = 4244 },
|
| 1636 |
+
]
|
| 1637 |
+
|
| 1638 |
[[package]]
|
| 1639 |
name = "requests-toolbelt"
|
| 1640 |
version = "1.0.0"
|
|
|
|
| 1674 |
{ url = "https://files.pythonhosted.org/packages/52/59/0782e51887ac6b07ffd1570e0364cf901ebc36345fea669969d2084baebb/simple_websocket-1.1.0-py3-none-any.whl", hash = "sha256:4af6069630a38ed6c561010f0e11a5bc0d4ca569b36306eb257cd9a192497c8c", size = 13842 },
|
| 1675 |
]
|
| 1676 |
|
| 1677 |
+
[[package]]
|
| 1678 |
+
name = "six"
|
| 1679 |
+
version = "1.17.0"
|
| 1680 |
+
source = { registry = "https://pypi.org/simple" }
|
| 1681 |
+
sdist = { url = "https://files.pythonhosted.org/packages/94/e7/b2c673351809dca68a0e064b6af791aa332cf192da575fd474ed7d6f16a2/six-1.17.0.tar.gz", hash = "sha256:ff70335d468e7eb6ec65b95b99d3a2836546063f63acc5171de367e834932a81", size = 34031 }
|
| 1682 |
+
wheels = [
|
| 1683 |
+
{ url = "https://files.pythonhosted.org/packages/b7/ce/149a00dd41f10bc29e5921b496af8b574d8413afcd5e30dfa0ed46c2cc5e/six-1.17.0-py2.py3-none-any.whl", hash = "sha256:4721f391ed90541fddacab5acf947aa0d3dc7d27b2e1e8eda2be8970586c3274", size = 11050 },
|
| 1684 |
+
]
|
| 1685 |
+
|
| 1686 |
[[package]]
|
| 1687 |
name = "sniffio"
|
| 1688 |
version = "1.3.1"
|
|
|
|
| 1692 |
{ url = "https://files.pythonhosted.org/packages/e9/44/75a9c9421471a6c4805dbf2356f7c181a29c1879239abab1ea2cc8f38b40/sniffio-1.3.1-py3-none-any.whl", hash = "sha256:2f6da418d1f1e0fddd844478f41680e794e6051915791a034ff65e5f100525a2", size = 10235 },
|
| 1693 |
]
|
| 1694 |
|
| 1695 |
+
[[package]]
|
| 1696 |
+
name = "soupsieve"
|
| 1697 |
+
version = "2.6"
|
| 1698 |
+
source = { registry = "https://pypi.org/simple" }
|
| 1699 |
+
sdist = { url = "https://files.pythonhosted.org/packages/d7/ce/fbaeed4f9fb8b2daa961f90591662df6a86c1abf25c548329a86920aedfb/soupsieve-2.6.tar.gz", hash = "sha256:e2e68417777af359ec65daac1057404a3c8a5455bb8abc36f1a9866ab1a51abb", size = 101569 }
|
| 1700 |
+
wheels = [
|
| 1701 |
+
{ url = "https://files.pythonhosted.org/packages/d1/c2/fe97d779f3ef3b15f05c94a2f1e3d21732574ed441687474db9d342a7315/soupsieve-2.6-py3-none-any.whl", hash = "sha256:e72c4ff06e4fb6e4b5a9f0f55fe6e81514581fca1515028625d0f299c602ccc9", size = 36186 },
|
| 1702 |
+
]
|
| 1703 |
+
|
| 1704 |
[[package]]
|
| 1705 |
name = "sqlalchemy"
|
| 1706 |
version = "2.0.40"
|
|
|
|
| 1781 |
{ url = "https://files.pythonhosted.org/packages/de/a8/8f499c179ec900783ffe133e9aab10044481679bb9aad78436d239eee716/tiktoken-0.9.0-cp313-cp313-win_amd64.whl", hash = "sha256:5ea0edb6f83dc56d794723286215918c1cde03712cbbafa0348b33448faf5b95", size = 894669 },
|
| 1782 |
]
|
| 1783 |
|
| 1784 |
+
[[package]]
|
| 1785 |
+
name = "tinysegmenter"
|
| 1786 |
+
version = "0.3"
|
| 1787 |
+
source = { registry = "https://pypi.org/simple" }
|
| 1788 |
+
sdist = { url = "https://files.pythonhosted.org/packages/17/82/86982e4b6d16e4febc79c2a1d68ee3b707e8a020c5d2bc4af8052d0f136a/tinysegmenter-0.3.tar.gz", hash = "sha256:ed1f6d2e806a4758a73be589754384cbadadc7e1a414c81a166fc9adf2d40c6d", size = 16893 }
|
| 1789 |
+
|
| 1790 |
+
[[package]]
|
| 1791 |
+
name = "tldextract"
|
| 1792 |
+
version = "5.2.0"
|
| 1793 |
+
source = { registry = "https://pypi.org/simple" }
|
| 1794 |
+
dependencies = [
|
| 1795 |
+
{ name = "filelock" },
|
| 1796 |
+
{ name = "idna" },
|
| 1797 |
+
{ name = "requests" },
|
| 1798 |
+
{ name = "requests-file" },
|
| 1799 |
+
]
|
| 1800 |
+
sdist = { url = "https://files.pythonhosted.org/packages/20/7a/e469c4f71231a848492da31a7be6921a6cd04ecc8eed58e924bece0fb6de/tldextract-5.2.0.tar.gz", hash = "sha256:c3a8c4daf2c25a57f54d6ef6762aeac7eff5ac3da04cdb607130be757b8457ab", size = 126839 }
|
| 1801 |
+
wheels = [
|
| 1802 |
+
{ url = "https://files.pythonhosted.org/packages/5e/20/b400e99827439eb91d5aa283e09d43e7e46aba66b07edf6f09404cb741da/tldextract-5.2.0-py3-none-any.whl", hash = "sha256:59509cbf99628c9440f4d19d3a1fd8488d50297ea23879c136576263c5a04eba", size = 106308 },
|
| 1803 |
+
]
|
| 1804 |
+
|
| 1805 |
[[package]]
|
| 1806 |
name = "tomli"
|
| 1807 |
version = "2.2.1"
|