panos-span commited on
Commit
daba587
·
verified ·
1 Parent(s): 49b6a36

Update agent.py

Browse files
Files changed (1) hide show
  1. agent.py +416 -190
agent.py CHANGED
@@ -1,214 +1,440 @@
1
- """LangGraph Agent"""
2
- import os
3
- from dotenv import load_dotenv
4
- from langgraph.graph import START, StateGraph, MessagesState
5
- from langgraph.prebuilt import tools_condition
6
- from langgraph.prebuilt import ToolNode
7
- from langchain_google_genai import ChatGoogleGenerativeAI
8
- from langchain_groq import ChatGroq
9
- from langchain_huggingface import ChatHuggingFace, HuggingFaceEndpoint, HuggingFaceEmbeddings
10
- from langchain_community.tools.tavily_search import TavilySearchResults
11
- from langchain_community.document_loaders import WikipediaLoader
12
- from langchain_community.document_loaders import ArxivLoader
13
- from langchain_community.vectorstores import SupabaseVectorStore
14
- from langchain_core.messages import SystemMessage, HumanMessage
15
- from langchain_core.tools import tool
16
- from langchain.tools.retriever import create_retriever_tool
17
- from supabase.client import Client, create_client
18
 
19
- load_dotenv()
 
 
 
 
 
20
 
21
- @tool
22
- def multiply(a: int, b: int) -> int:
23
- """Multiply two numbers.
24
 
25
- Args:
26
- a: first int
27
- b: second int
28
- """
29
- return a * b
30
 
31
- @tool
32
- def add(a: int, b: int) -> int:
33
- """Add two numbers.
 
 
 
 
 
 
 
 
 
34
 
35
- Args:
36
- a: first int
37
- b: second int
38
- """
39
- return a + b
40
-
41
- @tool
42
- def subtract(a: int, b: int) -> int:
43
- """Subtract two numbers.
44
 
45
- Args:
46
- a: first int
47
- b: second int
48
- """
49
- return a - b
50
 
51
- @tool
52
- def divide(a: int, b: int) -> int:
53
- """Divide two numbers.
54
-
55
- Args:
56
- a: first int
57
- b: second int
58
  """
59
- if b == 0:
60
- raise ValueError("Cannot divide by zero.")
61
- return a / b
62
-
63
- @tool
64
- def modulus(a: int, b: int) -> int:
65
- """Get the modulus of two numbers.
66
-
67
- Args:
68
- a: first int
69
- b: second int
70
  """
71
- return a % b
72
-
73
- @tool
74
- def wiki_search(query: str) -> str:
75
- """Search Wikipedia for a query and return maximum 2 results.
76
 
77
- Args:
78
- query: The search query."""
79
- search_docs = WikipediaLoader(query=query, load_max_docs=2).load()
80
- formatted_search_docs = "\n\n---\n\n".join(
81
- [
82
- f'<Document source="{doc.metadata["source"]}" page="{doc.metadata.get("page", "")}"/>\n{doc.page_content}\n</Document>'
83
- for doc in search_docs
84
- ])
85
- return {"wiki_results": formatted_search_docs}
86
-
87
- @tool
88
- def web_search(query: str) -> str:
89
- """Search Tavily for a query and return maximum 3 results.
90
 
91
- Args:
92
- query: The search query."""
93
- search_docs = TavilySearchResults(max_results=3).invoke(query=query)
94
- formatted_search_docs = "\n\n---\n\n".join(
95
- [
96
- f'<Document source="{doc.metadata["source"]}" page="{doc.metadata.get("page", "")}"/>\n{doc.page_content}\n</Document>'
97
- for doc in search_docs
98
- ])
99
- return {"web_results": formatted_search_docs}
100
-
101
- @tool
102
- def arvix_search(query: str) -> str:
103
- """Search Arxiv for a query and return maximum 3 result.
104
 
105
- Args:
106
- query: The search query."""
107
- search_docs = ArxivLoader(query=query, load_max_docs=3).load()
108
- formatted_search_docs = "\n\n---\n\n".join(
109
- [
110
- f'<Document source="{doc.metadata["source"]}" page="{doc.metadata.get("page", "")}"/>\n{doc.page_content[:1000]}\n</Document>'
111
- for doc in search_docs
112
- ])
113
- return {"arvix_results": formatted_search_docs}
114
-
115
-
116
-
117
- # load the system prompt from the file
118
- with open("system_prompt.txt", "r", encoding="utf-8") as f:
119
- system_prompt = f.read()
 
 
 
 
 
 
 
120
 
121
- # System message
122
- sys_msg = SystemMessage(content=system_prompt)
 
 
 
 
 
123
 
124
- # build a retriever
125
- embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2") # dim=768
126
- supabase: Client = create_client(
127
- os.environ.get("SUPABASE_URL"),
128
- os.environ.get("SUPABASE_SERVICE_KEY"))
129
- vector_store = SupabaseVectorStore(
130
- client=supabase,
131
- embedding= embeddings,
132
- table_name="documents",
133
- query_name="match_documents_langchain",
134
  )
135
- create_retriever_tool = create_retriever_tool(
136
- retriever=vector_store.as_retriever(),
137
- name="Question Search",
138
- description="A tool to retrieve similar questions from a vector store.",
 
139
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
140
 
 
141
 
 
142
 
143
- tools = [
144
- multiply,
145
- add,
146
- subtract,
147
- divide,
148
- modulus,
149
- wiki_search,
150
- web_search,
151
- arvix_search,
152
- ]
153
 
154
- # Build graph function
155
- def build_graph(provider: str = "groq"):
156
- """Build the graph"""
157
- # Load environment variables from .env file
158
- if provider == "google":
159
- # Google Gemini
160
- llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash", temperature=0)
161
- elif provider == "groq":
162
- # Groq https://console.groq.com/docs/models
163
- llm = ChatGroq(model="qwen-qwq-32b", temperature=0) # optional : qwen-qwq-32b gemma2-9b-it
164
- elif provider == "huggingface":
165
- # TODO: Add huggingface endpoint
166
- llm = ChatHuggingFace(
167
- llm=HuggingFaceEndpoint(
168
- url="https://api-inference.huggingface.co/models/Meta-DeepLearning/llama-2-7b-chat-hf",
169
- temperature=0,
170
- ),
171
- )
172
- else:
173
- raise ValueError("Invalid provider. Choose 'google', 'groq' or 'huggingface'.")
174
- # Bind tools to LLM
175
- llm_with_tools = llm.bind_tools(tools)
176
 
177
- # Node
178
- def assistant(state: MessagesState):
179
- """Assistant node"""
180
- return {"messages": [llm_with_tools.invoke(state["messages"])]}
181
-
182
- def retriever(state: MessagesState):
183
- """Retriever node"""
184
- similar_question = vector_store.similarity_search(state["messages"][0].content)
185
- example_msg = HumanMessage(
186
- content=f"Here I provide a similar question and answer for reference: \n\n{similar_question[0].page_content}",
187
- )
188
- return {"messages": [sys_msg] + state["messages"] + [example_msg]}
189
 
190
- builder = StateGraph(MessagesState)
191
- builder.add_node("retriever", retriever)
192
- builder.add_node("assistant", assistant)
193
- builder.add_node("tools", ToolNode(tools))
194
- builder.add_edge(START, "retriever")
195
- builder.add_edge("retriever", "assistant")
196
- builder.add_conditional_edges(
197
- "assistant",
198
- tools_condition,
199
- )
200
- builder.add_edge("tools", "assistant")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
201
 
202
- # Compile graph
203
- return builder.compile()
 
 
 
 
 
 
 
204
 
205
- # test
206
- if __name__ == "__main__":
207
- question = "When was a picture of St. Thomas Aquinas first added to the Wikipedia page on the Principle of double effect?"
208
- # Build the graph
209
- graph = build_graph(provider="groq")
210
- # Run the graph
211
- messages = [HumanMessage(content=question)]
212
- messages = graph.invoke({"messages": messages})
213
- for m in messages["messages"]:
214
- m.pretty_print()
 
1
+ """
2
+ Modified agent.py - Fixed with Hugging Face models instead of OpenAI
3
+ Fixes LangSmith authentication and missing PostgreSQL function issues
4
+ """
 
 
 
 
 
 
 
 
 
 
 
 
 
5
 
6
+ import os
7
+ import logging
8
+ import warnings
9
+ from typing import List, Dict, Any, Optional, Union
10
+ import pandas as pd
11
+ from supabase import create_client, Client
12
 
13
+ # Suppress LangSmith warnings to avoid authentication errors
14
+ warnings.filterwarnings("ignore", category=UserWarning, module="langsmith")
15
+ logging.getLogger("langsmith").setLevel(logging.ERROR)
16
 
17
+ # Disable LangSmith tracing to avoid 401 errors
18
+ os.environ["LANGCHAIN_TRACING_V2"] = "false"
 
 
 
19
 
20
+ try:
21
+ from langchain.agents import AgentType, AgentExecutor, create_react_agent
22
+ from langchain.tools import BaseTool, tool
23
+ from langchain.memory import ConversationBufferMemory
24
+ from langchain_community.llms import HuggingFacePipeline
25
+ from langchain_community.embeddings import HuggingFaceEmbeddings
26
+ from langchain_community.vectorstores import SupabaseVectorStore
27
+ from langchain.schema import Document
28
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
29
+ from langchain.chains import RetrievalQA
30
+ from langchain.prompts import PromptTemplate
31
+ from langchain_core.prompts import ChatPromptTemplate
32
 
33
+ # Hugging Face specific imports
34
+ from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
35
+ from sentence_transformers import SentenceTransformer
36
+ import torch
 
 
 
 
 
37
 
38
+ except ImportError as e:
39
+ print(f"Import error: {e}")
40
+ print("Please install required packages: pip install transformers sentence-transformers torch")
 
 
41
 
42
+ class RobotPaiAgent:
 
 
 
 
 
 
43
  """
44
+ RobotPai Agent using Hugging Face models instead of OpenAI
45
+ Fixes authentication and database function issues
 
 
 
 
 
 
 
 
 
46
  """
 
 
 
 
 
47
 
48
+ def __init__(self, model_name: str = "microsoft/DialoGPT-medium"):
49
+ print("🤖 Initializing RobotPai Agent with Hugging Face models...")
50
+ self.model_name = model_name
51
+ self.setup_environment()
52
+ self.setup_supabase()
53
+ self.setup_models()
54
+ self.setup_vectorstore()
55
+ self.setup_tools()
56
+ self.setup_agent()
 
 
 
 
57
 
58
+ def setup_environment(self):
59
+ """Setup environment variables with error handling"""
60
+ # Disable LangSmith to avoid authentication errors
61
+ os.environ["LANGCHAIN_TRACING_V2"] = "false"
62
+
63
+ # Required environment variables
64
+ self.supabase_url = os.getenv("SUPABASE_URL")
65
+ self.supabase_key = os.getenv("SUPABASE_SERVICE_ROLE_KEY")
66
+
67
+ if not all([self.supabase_url, self.supabase_key]):
68
+ raise ValueError("Missing required environment variables: SUPABASE_URL, SUPABASE_SERVICE_ROLE_KEY")
69
+
70
+ print(" Environment configured")
71
 
72
+ def setup_supabase(self):
73
+ """Setup Supabase client and ensure database setup"""
74
+ try:
75
+ self.supabase_client: Client = create_client(self.supabase_url, self.supabase_key)
76
+ self.ensure_database_setup()
77
+ print(" Supabase client initialized")
78
+ except Exception as e:
79
+ print(f"⚠️ Supabase setup failed: {e}")
80
+ self.supabase_client = None
81
+
82
+ def ensure_database_setup(self):
83
+ """Ensure the database has required tables and functions"""
84
+ try:
85
+ # Check if documents table exists
86
+ result = self.supabase_client.table('documents').select('id').limit(1).execute()
87
+ print("✅ Documents table exists")
88
+ except Exception as e:
89
+ print(f"⚠️ Database setup needed: {e}")
90
+ print("Please run the SQL setup in your Supabase dashboard:")
91
+ print("""
92
+ -- Enable pgvector extension
93
+ CREATE EXTENSION IF NOT EXISTS vector;
94
 
95
+ -- Create documents table
96
+ CREATE TABLE IF NOT EXISTS documents (
97
+ id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
98
+ content TEXT NOT NULL,
99
+ metadata JSONB DEFAULT '{}',
100
+ embedding VECTOR(384) -- Dimension for sentence-transformers
101
+ );
102
 
103
+ -- Create match_documents_langchain function
104
+ CREATE OR REPLACE FUNCTION match_documents_langchain(
105
+ query_embedding VECTOR(384),
106
+ match_count INT DEFAULT 10,
107
+ filter JSONB DEFAULT '{}'
 
 
 
 
 
108
  )
109
+ RETURNS TABLE (
110
+ id UUID,
111
+ content TEXT,
112
+ metadata JSONB,
113
+ similarity FLOAT
114
  )
115
+ LANGUAGE plpgsql
116
+ AS $$
117
+ BEGIN
118
+ RETURN QUERY
119
+ SELECT
120
+ documents.id,
121
+ documents.content,
122
+ documents.metadata,
123
+ 1 - (documents.embedding <=> query_embedding) AS similarity
124
+ FROM documents
125
+ WHERE documents.metadata @> filter
126
+ ORDER BY documents.embedding <=> query_embedding
127
+ LIMIT match_count;
128
+ END;
129
+ $$;
130
+ """)
131
+
132
+ def setup_models(self):
133
+ """Setup Hugging Face models for LLM and embeddings"""
134
+ try:
135
+ # Setup embeddings using sentence-transformers (faster and smaller)
136
+ print("🔄 Loading embedding model...")
137
+ self.embeddings = HuggingFaceEmbeddings(
138
+ model_name="all-MiniLM-L6-v2", # 384 dimensions, fast and good quality
139
+ model_kwargs={'device': 'cpu'}, # Use CPU for compatibility
140
+ encode_kwargs={'normalize_embeddings': True}
141
+ )
142
+ print("✅ Embeddings model loaded")
143
+
144
+ # Setup LLM using a lightweight model suitable for HF Spaces
145
+ print("🔄 Loading language model...")
146
+
147
+ # Use a smaller, faster model for Hugging Face Spaces
148
+ model_id = "microsoft/DialoGPT-small" # Smaller model for faster inference
149
+
150
+ try:
151
+ # Create a text generation pipeline
152
+ self.llm_pipeline = pipeline(
153
+ "text-generation",
154
+ model=model_id,
155
+ tokenizer=model_id,
156
+ max_length=512,
157
+ temperature=0.7,
158
+ do_sample=True,
159
+ device_map="auto" if torch.cuda.is_available() else None,
160
+ torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32
161
+ )
162
+
163
+ # Wrap in LangChain HuggingFacePipeline
164
+ self.llm = HuggingFacePipeline(
165
+ pipeline=self.llm_pipeline,
166
+ model_kwargs={"temperature": 0.7, "max_length": 512}
167
+ )
168
+ print(f"✅ Language model loaded: {model_id}")
169
+
170
+ except Exception as e:
171
+ print(f"⚠️ Failed to load {model_id}: {e}")
172
+ # Fallback to a simple text completion
173
+ print("🔄 Using fallback model...")
174
+ self.llm = self._create_fallback_llm()
175
+
176
+ except Exception as e:
177
+ print(f"❌ Model setup failed: {e}")
178
+ # Create minimal fallback
179
+ self.embeddings = None
180
+ self.llm = self._create_fallback_llm()
181
+
182
+ def _create_fallback_llm(self):
183
+ """Create a simple fallback LLM for when models fail to load"""
184
+ class SimpleLLM:
185
+ def __call__(self, prompt: str) -> str:
186
+ return f"I'm a simple AI assistant. You asked: {prompt[:100]}... I would help you search documents and analyze data, but I need proper model setup."
187
+
188
+ def invoke(self, prompt: str) -> str:
189
+ return self.__call__(prompt)
190
+
191
+ return SimpleLLM()
192
+
193
+ def setup_vectorstore(self):
194
+ """Setup vector store with proper error handling"""
195
+ if not self.supabase_client or not self.embeddings:
196
+ print("⚠️ Skipping vector store setup - missing dependencies")
197
+ self.vectorstore = None
198
+ return
199
+
200
+ try:
201
+ # Initialize vector store with correct function name
202
+ self.vectorstore = SupabaseVectorStore(
203
+ client=self.supabase_client,
204
+ embedding=self.embeddings,
205
+ table_name="documents",
206
+ query_name="match_documents_langchain" # Use the function we created
207
+ )
208
+ print("✅ Vector store initialized")
209
+
210
+ except Exception as e:
211
+ print(f"⚠️ Vector store setup failed: {e}")
212
+ self.vectorstore = None
213
+
214
+ def setup_tools(self):
215
+ """Setup tools for the agent"""
216
+ self.tools = []
217
+
218
+ # Document Search Tool
219
+ @tool
220
+ def search_documents(query: str) -> str:
221
+ """Search for relevant documents in the knowledge base."""
222
+ if not self.vectorstore:
223
+ return "Vector store not available. Please check database setup."
224
+
225
+ try:
226
+ docs = self.vectorstore.similarity_search(query, k=3)
227
+ if docs:
228
+ results = []
229
+ for i, doc in enumerate(docs, 1):
230
+ content = doc.page_content[:300] + "..." if len(doc.page_content) > 300 else doc.page_content
231
+ results.append(f"Document {i}: {content}")
232
+ return "\n\n".join(results)
233
+ else:
234
+ return "No relevant documents found."
235
+ except Exception as e:
236
+ return f"Error searching documents: {str(e)}"
237
+
238
+ # CSV Analysis Tool
239
+ @tool
240
+ def analyze_csv_data(query: str) -> str:
241
+ """Analyze CSV data and answer questions about it."""
242
+ try:
243
+ # Load the CSV file if it exists
244
+ if os.path.exists("supabase_docs.csv"):
245
+ df = pd.read_csv("supabase_docs.csv")
246
+
247
+ # Basic analysis based on query
248
+ if "rows" in query.lower() or "count" in query.lower():
249
+ return f"The CSV has {len(df)} rows and {len(df.columns)} columns."
250
+ elif "columns" in query.lower():
251
+ return f"Columns: {', '.join(df.columns.tolist())}"
252
+ elif "head" in query.lower() or "first" in query.lower():
253
+ return f"First 5 rows:\n{df.head().to_string()}"
254
+ else:
255
+ return f"CSV loaded with {len(df)} rows. Available columns: {', '.join(df.columns.tolist())}"
256
+ else:
257
+ return "CSV file not found. Please upload supabase_docs.csv"
258
+ except Exception as e:
259
+ return f"Error analyzing CSV: {str(e)}"
260
+
261
+ # General Q&A Tool
262
+ @tool
263
+ def answer_question(question: str) -> str:
264
+ """Answer general questions using the language model."""
265
+ try:
266
+ # Simple prompt for the question
267
+ prompt = f"Question: {question}\nAnswer:"
268
+ response = self.llm.invoke(prompt)
269
+ return response if isinstance(response, str) else str(response)
270
+ except Exception as e:
271
+ return f"I'm unable to process that question right now. Error: {str(e)}"
272
+
273
+ self.tools = [search_documents, analyze_csv_data, answer_question]
274
+ print(f"✅ {len(self.tools)} tools initialized")
275
+
276
+ def setup_agent(self):
277
+ """Setup the agent with React framework"""
278
+ try:
279
+ # Create a simple prompt template
280
+ template = """Answer the following questions as best you can. You have access to the following tools:
281
 
282
+ {tools}
283
 
284
+ Use the following format:
285
 
286
+ Question: the input question you must answer
287
+ Thought: you should always think about what to do
288
+ Action: the action to take, should be one of [{tool_names}]
289
+ Action Input: the input to the action
290
+ Observation: the result of the action
291
+ ... (this Thought/Action/Action Input/Observation can repeat N times)
292
+ Thought: I now know the final answer
293
+ Final Answer: the final answer to the original input question
 
 
294
 
295
+ Begin!
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
296
 
297
+ Question: {input}
298
+ Thought: {agent_scratchpad}"""
 
 
 
 
 
 
 
 
 
 
299
 
300
+ prompt = PromptTemplate.from_template(template)
301
+
302
+ # Create a simple agent using React pattern
303
+ if hasattr(self.llm, 'invoke'):
304
+ agent = create_react_agent(self.llm, self.tools, prompt)
305
+ self.agent_executor = AgentExecutor(
306
+ agent=agent,
307
+ tools=self.tools,
308
+ verbose=True,
309
+ max_iterations=3,
310
+ handle_parsing_errors=True,
311
+ return_intermediate_steps=True
312
+ )
313
+ else:
314
+ # Fallback for simple LLM
315
+ self.agent_executor = self._create_simple_executor()
316
+
317
+ print("✅ Agent initialized successfully")
318
+
319
+ except Exception as e:
320
+ print(f"⚠️ Agent setup failed: {e}")
321
+ self.agent_executor = self._create_simple_executor()
322
+
323
+ def _create_simple_executor(self):
324
+ """Create a simple executor when full agent setup fails"""
325
+ class SimpleExecutor:
326
+ def __init__(self, tools, llm):
327
+ self.tools = {tool.name: tool for tool in tools}
328
+ self.llm = llm
329
+
330
+ def invoke(self, inputs: Dict[str, Any]) -> Dict[str, Any]:
331
+ query = inputs.get("input", "")
332
+
333
+ # Simple routing logic
334
+ if "document" in query.lower() or "search" in query.lower():
335
+ if "search_documents" in self.tools:
336
+ result = self.tools["search_documents"].invoke(query)
337
+ return {"output": result}
338
+
339
+ elif "csv" in query.lower() or "data" in query.lower():
340
+ if "analyze_csv_data" in self.tools:
341
+ result = self.tools["analyze_csv_data"].invoke(query)
342
+ return {"output": result}
343
+
344
+ else:
345
+ if "answer_question" in self.tools:
346
+ result = self.tools["answer_question"].invoke(query)
347
+ return {"output": result}
348
+
349
+ return {"output": f"I can help you with document search, CSV analysis, or general questions. You asked: {query}"}
350
+
351
+ return SimpleExecutor(self.tools, self.llm)
352
+
353
+ def add_documents(self, texts: List[str], metadatas: List[Dict] = None):
354
+ """Add documents to the vector store"""
355
+ if not self.vectorstore:
356
+ print("⚠️ Vector store not available")
357
+ return False
358
+
359
+ try:
360
+ # Split long texts into chunks
361
+ text_splitter = RecursiveCharacterTextSplitter(
362
+ chunk_size=500, # Smaller chunks for better performance
363
+ chunk_overlap=100
364
+ )
365
+
366
+ all_texts = []
367
+ all_metadatas = []
368
+
369
+ for i, text in enumerate(texts):
370
+ chunks = text_splitter.split_text(text)
371
+ all_texts.extend(chunks)
372
+
373
+ # Add metadata for each chunk
374
+ base_metadata = metadatas[i] if metadatas and i < len(metadatas) else {}
375
+ for j, chunk in enumerate(chunks):
376
+ chunk_metadata = base_metadata.copy()
377
+ chunk_metadata.update({"chunk_id": j, "source_doc": i})
378
+ all_metadatas.append(chunk_metadata)
379
+
380
+ # Add to vector store
381
+ ids = self.vectorstore.add_texts(all_texts, all_metadatas)
382
+ print(f"✅ Added {len(ids)} document chunks to vector store")
383
+ return True
384
+
385
+ except Exception as e:
386
+ print(f"❌ Error adding documents: {e}")
387
+ return False
388
+
389
+ def process_query(self, query: str) -> str:
390
+ """Process a user query through the agent"""
391
+ try:
392
+ if self.agent_executor:
393
+ response = self.agent_executor.invoke({"input": query})
394
+ return response.get("output", "Sorry, I couldn't process your query.")
395
+ else:
396
+ return "Agent not properly initialized. Please check your setup."
397
+ except Exception as e:
398
+ return f"Error processing query: {str(e)}"
399
+
400
+ def load_csv_for_analysis(self, file_path: str = "supabase_docs.csv") -> bool:
401
+ """Load CSV data for analysis"""
402
+ try:
403
+ if not os.path.exists(file_path):
404
+ print(f"⚠️ CSV file not found: {file_path}")
405
+ return False
406
+
407
+ df = pd.read_csv(file_path)
408
+ print(f"✅ Loaded CSV with {len(df)} rows and {len(df.columns)} columns")
409
+
410
+ # Optionally add CSV content to vector store for searching
411
+ if self.vectorstore:
412
+ documents = []
413
+ for _, row in df.head(100).iterrows(): # Limit to first 100 rows
414
+ content = " | ".join([f"{col}: {val}" for col, val in row.items() if pd.notna(val)])
415
+ documents.append(content)
416
+
417
+ metadatas = [{"source": "csv_data", "row_id": i} for i in range(len(documents))]
418
+ self.add_documents(documents, metadatas)
419
+ print("✅ CSV data added to vector store for searching")
420
+
421
+ return True
422
+
423
+ except Exception as e:
424
+ print(f"❌ Error loading CSV: {e}")
425
+ return False
426
 
427
+ # Utility function for direct usage
428
+ def create_agent():
429
+ """Create and return a RobotPai agent instance"""
430
+ try:
431
+ agent = RobotPaiAgent()
432
+ return agent
433
+ except Exception as e:
434
+ print(f"Failed to create agent: {e}")
435
+ return None
436
 
437
+ # For backward compatibility
438
+ def get_agent():
439
+ """Get agent instance - for backward compatibility"""
440
+ return create_agent()