selfevolveagent / evoagentx /tools /database_postgresql.py

Upload 2846 files

5374a2d verified 12 days ago

56.7 kB

	import time
	import json
	from typing import Dict, Any, List, Union, Optional
	from pathlib import Path
	import psycopg2
	import psycopg2.extras
	import re

	from .database_base import DatabaseBase, DatabaseType, QueryType, DatabaseConnection
	from .tool import Tool, Toolkit
	from ..core.logging import logger

	class PostgreSQLConnection(DatabaseConnection):
	"""PostgreSQL-specific connection management"""
	def __init__(self, connection_string: str, **kwargs):
	super().__init__(connection_string, **kwargs)
	self.conn = None

	def connect(self) -> bool:
	try:
	self.conn = psycopg2.connect(self.connection_string, **self.connection_params)
	self._is_connected = True
	logger.info("Successfully connected to PostgreSQL")
	return True
	except Exception as e:
	logger.error(f"Failed to connect to PostgreSQL: {str(e)}")
	self._is_connected = False
	return False

	def disconnect(self) -> bool:
	try:
	if self.conn:
	self.conn.close()
	self.conn = None
	self._is_connected = False
	logger.info("Disconnected from PostgreSQL")
	return True
	except Exception as e:
	logger.error(f"Error disconnecting from PostgreSQL: {str(e)}")
	return False

	def test_connection(self) -> bool:
	try:
	if self.conn:
	with self.conn.cursor() as cur:
	cur.execute("SELECT 1;")
	return True
	return False
	except Exception:
	return False

	class PostgreSQLDatabase(DatabaseBase):
	"""
	PostgreSQL database implementation with automatic initialization.
	Handles remote connections, existing local databases, and new local database creation.
	"""
	def __init__(self,
	connection_string: str = None,
	database_name: str = None,
	local_path: str = None,
	auto_save: bool = True,
	**kwargs):
	init_params = {
	'connection_string': connection_string,
	'database_name': database_name
	}
	super().__init__(init_params, kwargs)
	self.local_path = Path(local_path) if local_path else None
	self.auto_save = auto_save
	self.connection_params = kwargs
	self.is_local_database = False
	self.conn = None
	self.cursor = None
	self.file_based_mode = False
	self.tables = {} # For file-based mode

	if self._is_remote_connection():
	self._init_remote_database()
	elif self._is_existing_local_database():
	self._init_existing_local_database()
	else:
	self._init_new_local_database()

	def _is_remote_connection(self) -> bool:
	return self.connection_string and ("@" in self.connection_string or "postgresql://" in self.connection_string)

	def _is_existing_local_database(self) -> bool:
	if not self.local_path:
	return False
	if not self.local_path.exists():
	return False
	db_info_file = self.local_path / "db_info.json"
	return db_info_file.exists()

	def _init_remote_database(self):
	"""Initialize remote PostgreSQL connection"""
	try:
	# Add connection timeout to prevent hanging
	connection_params = self.connection_params.copy()
	connection_params.update({
	'connect_timeout': 5, # 5 second timeout
	'options': '-c statement_timeout=5000' # 5 second statement timeout
	})

	self.conn = psycopg2.connect(self.connection_string, **connection_params)
	self.cursor = self.conn.cursor(cursor_factory=psycopg2.extras.RealDictCursor)
	if self.database_name:
	self.conn.set_isolation_level(0)
	self.cursor.execute("SELECT 1 FROM pg_database WHERE datname = %s", (self.database_name,))
	self._is_initialized = True
	self.is_local_database = False
	self.file_based_mode = False
	logger.info(f"Connected to remote PostgreSQL: {self.database_name}")
	except Exception as e:
	logger.error(f"Failed to connect to remote PostgreSQL: {str(e)}")
	self._is_initialized = False
	# Don't raise, just log the error and continue with local mode
	logger.info("Falling back to local database mode")

	def _init_existing_local_database(self):
	"""Initialize existing local file-based database"""
	try:
	if not self.database_name:
	self.database_name = self.local_path.name

	# Load existing tables from JSON files
	self._load_tables_from_files()

	self._is_initialized = True
	self.is_local_database = True
	self.file_based_mode = True
	logger.info(f"Loaded existing local file-based database from: {self.local_path}")
	except Exception as e:
	logger.error(f"Failed to load existing local database: {str(e)}")
	self._is_initialized = False
	logger.info("Falling back to new local database mode")
	self._init_new_local_database()

	def _init_new_local_database(self):
	"""Initialize new local file-based database"""
	try:
	if not self.local_path:
	self.local_path = Path("./workplace/postgresql_local")
	self.local_path.mkdir(parents=True, exist_ok=True)

	if not self.database_name:
	self.database_name = self.local_path.name

	self._create_db_info_file()
	self._is_initialized = True
	self.is_local_database = True
	self.file_based_mode = True
	logger.info(f"Created new local file-based database at: {self.local_path}")
	except Exception as e:
	logger.error(f"Failed to create new local database: {str(e)}")
	self._is_initialized = False
	logger.info("Database initialization failed, but toolkit is still usable")

	def _create_db_info_file(self):
	"""Create database info file"""
	try:
	db_info = {
	"database_name": self.database_name,
	"created_at": time.time(),
	"local_path": str(self.local_path.absolute()),
	"auto_save": self.auto_save,
	"version": "1.0",
	"mode": "file_based"
	}
	info_file = self.local_path / "db_info.json"
	with open(info_file, 'w', encoding='utf-8') as f:
	json.dump(db_info, f, indent=2, ensure_ascii=False)
	except Exception as e:
	logger.warning(f"Failed to create db info file: {str(e)}")

	def _load_tables_from_files(self):
	"""Load tables from JSON files"""
	try:
	for json_file in self.local_path.glob("*.json"):
	if json_file.name == "db_info.json":
	continue
	table_name = json_file.stem
	with open(json_file, 'r', encoding='utf-8') as f:
	loaded_data = json.load(f)
	# Ensure loaded data is a list
	if not isinstance(loaded_data, list):
	logger.warning(f"Table {table_name} file contains non-list data: {type(loaded_data)}, converting to empty list")
	self.tables[table_name] = []
	else:
	self.tables[table_name] = loaded_data
	except Exception as e:
	logger.warning(f"Error loading tables from files: {str(e)}")

	def _save_table_to_file(self, table_name: str):
	"""Save table data to JSON file"""
	try:
	if table_name in self.tables:
	table_file = self.local_path / f"{table_name}.json"
	with open(table_file, 'w', encoding='utf-8') as f:
	json.dump(self.tables[table_name], f, indent=2, ensure_ascii=False)
	except Exception as e:
	logger.error(f"Error saving table {table_name}: {str(e)}")



	def _parse_sql_query(self, sql: str) -> Dict[str, Any]:
	"""Enhanced SQL parser for file-based mode - now supports JOINs and complex queries"""
	sql = sql.strip()
	upper_sql = sql.upper()

	# CREATE TABLE
	if upper_sql.startswith("CREATE TABLE"):
	match = re.search(r"CREATE TABLE (?:IF NOT EXISTS )?(\w+) $(.?)$", sql, re.IGNORECASE \| re.DOTALL)
	if match:
	table = match.group(1).lower()
	columns = match.group(2)
	col_defs = [c.strip() for c in columns.split(',') if c.strip()]
	col_names = [c.split()[0] for c in col_defs]
	return {"type": "CREATE", "table": table, "columns": col_names}

	# INSERT
	elif upper_sql.startswith("INSERT"):
	match = re.search(r"INSERT INTO (\w+) $(.?)$ *VALUES", sql, re.IGNORECASE \| re.DOTALL)
	if match:
	table = match.group(1).lower()
	columns = [c.strip() for c in match.group(2).split(',')]
	values_match = re.search(r"VALUES\s(.)", sql, re.IGNORECASE \| re.DOTALL)
	if values_match:
	values_str = values_match.group(1)
	value_groups = re.findall(r'$([^)]+)$', values_str)
	all_values = []
	for group in value_groups:
	values = [v.strip().strip("'\"") for v in group.split(',')]
	all_values.append(values)
	return {"type": "INSERT", "table": table, "columns": columns, "values": all_values}

	# SELECT - Enhanced to support JOINs
	elif upper_sql.startswith("SELECT"):
	# Complex SELECT with JOINs
	if "JOIN" in upper_sql:
	# Parse JOIN queries
	match = re.search(r"SELECT (.?) FROM (\w+)(?:\s+(\w+))?\s+(?:(\w+)\s+)?JOIN\s+(\w+)(?:\s+(\w+))?\s+ON\s+(.?)(?: WHERE (.?))?(?: ORDER BY (.?))?(?: LIMIT (\d+))?", sql, re.IGNORECASE \| re.DOTALL)
	if match:
	columns = [c.strip() for c in match.group(1).split(',')]
	table1 = match.group(2).lower()
	alias1 = match.group(3)
	join_type = match.group(4) or "INNER"
	table2 = match.group(5).lower()
	alias2 = match.group(6)
	join_condition = match.group(7)
	where = match.group(8)
	order_by = match.group(9)
	limit = match.group(10)

	return {
	"type": "SELECT_JOIN",
	"columns": columns,
	"table1": table1,
	"alias1": alias1,
	"join_type": join_type,
	"table2": table2,
	"alias2": alias2,
	"join_condition": join_condition,
	"where": where,
	"order_by": order_by,
	"limit": limit
	}

	# CROSS JOIN support
	elif "CROSS JOIN" in upper_sql:
	match = re.search(r"SELECT (.?) FROM (\w+)(?:\s+(\w+))?\s+CROSS\s+JOIN\s+(\w+)(?:\s+(\w+))?(?: WHERE (.?))?(?: ORDER BY (.*?))?(?: LIMIT (\d+))?", sql, re.IGNORECASE \| re.DOTALL)
	if match:
	columns = [c.strip() for c in match.group(1).split(',')]
	table1 = match.group(2).lower()
	alias1 = match.group(3)
	table2 = match.group(4).lower()
	alias2 = match.group(5)
	where = match.group(6)
	order_by = match.group(7)
	limit = match.group(8)

	return {
	"type": "SELECT_CROSS_JOIN",
	"columns": columns,
	"table1": table1,
	"alias1": alias1,
	"table2": table2,
	"alias2": alias2,
	"where": where,
	"order_by": order_by,
	"limit": limit
	}

	# Simple SELECT (existing logic)
	else:
	match = re.search(r"SELECT (.?) FROM (\w+)(?: WHERE (.?))?(?: GROUP BY (.?))?(?: ORDER BY (.?))?(?: LIMIT (\d+))?", sql, re.IGNORECASE \| re.DOTALL)
	if match:
	columns = [c.strip() for c in match.group(1).split(',')]
	table = match.group(2).lower()
	where = match.group(3)
	group_by = match.group(4)
	order_by = match.group(5)
	limit = match.group(6)
	return {"type": "SELECT", "table": table, "columns": columns, "where": where, "group_by": group_by, "order_by": order_by, "limit": limit}

	# UPDATE
	elif upper_sql.startswith("UPDATE"):
	match = re.search(r"UPDATE (\w+) SET (.?)(?: WHERE (.?))?$", sql, re.IGNORECASE \| re.DOTALL)
	if match:
	table = match.group(1).lower()
	set_clause = match.group(2)
	where = match.group(3)
	return {"type": "UPDATE", "table": table, "set": set_clause, "where": where}

	# DELETE
	elif upper_sql.startswith("DELETE"):
	match = re.search(r"DELETE FROM (\w+)(?: WHERE (.*?))?", sql, re.IGNORECASE \| re.DOTALL)
	if match:
	table = match.group(1).lower()
	where = match.group(2)
	return {"type": "DELETE", "table": table, "where": where}

	return {"type": "UNKNOWN"}

	def _apply_where_filter(self, rows: List[Dict], where: str) -> List[Dict]:
	"""Apply WHERE filter to rows"""
	if not where:
	return rows

	# Ensure rows is a list of dictionaries
	if not isinstance(rows, list):
	logger.warning(f"_apply_where_filter: rows is not a list: {type(rows)}")
	return []

	# Filter out any non-dictionary items
	valid_rows = [r for r in rows if isinstance(r, dict)]
	if len(valid_rows) != len(rows):
	logger.warning(f"_apply_where_filter: filtered out {len(rows) - len(valid_rows)} non-dict rows")

	# Handle simple conditions: col = 'val', col > val, etc.
	m = re.match(r"(\w+) ([=><]+) '?([\w@.\- ]+)'?", where)
	if m:
	col, op, val = m.group(1), m.group(2), m.group(3)
	if op == "=":
	return [r for r in valid_rows if str(r.get(col, "")) == val]
	elif op == ">":
	try:
	val_num = int(val)
	return [r for r in valid_rows if int(r.get(col, 0)) > val_num]
	except ValueError:
	pass
	elif op == "<":
	try:
	val_num = int(val)
	return [r for r in valid_rows if int(r.get(col, 0)) < val_num]
	except ValueError:
	pass
	return valid_rows

	def _apply_column_selection(self, rows: List[Dict], columns: List[str]) -> List[Dict]:
	"""Apply column selection to rows"""
	if columns == ['*']:
	return rows

	# Ensure rows is a list of dictionaries
	if not isinstance(rows, list):
	logger.warning(f"_apply_column_selection: rows is not a list: {type(rows)}")
	return []

	# Filter out any non-dictionary items
	valid_rows = [r for r in rows if isinstance(r, dict)]
	if len(valid_rows) != len(rows):
	logger.warning(f"_apply_column_selection: filtered out {len(rows) - len(valid_rows)} non-dict rows")

	filtered_rows = []
	for row in valid_rows:
	filtered_row = {}
	for col in columns:
	if col in row:
	filtered_row[col] = row[col]
	filtered_rows.append(filtered_row)
	return filtered_rows

	def _apply_group_by(self, rows: List[Dict], group_by: str) -> List[Dict]:
	"""Apply GROUP BY aggregation to rows"""
	if not group_by:
	return rows

	# Ensure rows is a list of dictionaries
	if not isinstance(rows, list):
	logger.warning(f"_apply_group_by: rows is not a list: {type(rows)}")
	return []

	# Filter out any non-dictionary items
	valid_rows = [r for r in rows if isinstance(r, dict)]
	if len(valid_rows) != len(rows):
	logger.warning(f"_apply_group_by: filtered out {len(rows) - len(valid_rows)} non-dict rows")

	group_col = group_by.strip()
	groups = {}
	for row in valid_rows:
	group_val = row.get(group_col, "Unknown")
	if group_val not in groups:
	groups[group_val] = []
	groups[group_val].append(row)

	result = []
	for group_val, group_rows in groups.items():
	group_result = {group_col: group_val}
	# Always include all aggregation keys
	group_result["employee_count"] = len(group_rows)
	salaries = [float(r.get("salary", 0)) for r in group_rows if r.get("salary") is not None]
	group_result["avg_salary"] = sum(salaries) / len(salaries) if salaries else 0
	group_result["max_salary"] = max(salaries) if salaries else 0
	result.append(group_result)

	return result

	def _execute_join_query(self, parsed: Dict) -> Dict[str, Any]:
	"""Execute JOIN query in file-based mode"""
	try:
	table1 = parsed["table1"]
	table2 = parsed["table2"]
	columns = parsed["columns"]
	join_condition = parsed["join_condition"]
	where = parsed.get("where")

	# Get table data
	rows1 = self.tables.get(table1, [])
	rows2 = self.tables.get(table2, [])

	# Ensure rows are lists
	if not isinstance(rows1, list):
	logger.warning(f"Table {table1} contains non-list data: {type(rows1)}")
	rows1 = []
	if not isinstance(rows2, list):
	logger.warning(f"Table {table2} contains non-list data: {type(rows2)}")
	rows2 = []

	# Parse join condition: table1.col = table2.col
	join_match = re.match(r"(\w+)\.(\w+)\s=\s(\w+)\.(\w+)", join_condition)
	if not join_match:
	return {"error": "Invalid join condition format"}

	col1, col2 = join_match.group(2), join_match.group(4)

	# Perform JOIN
	result_rows = []
	for row1 in rows1:
	# Ensure row1 is a dictionary
	if not isinstance(row1, dict):
	logger.warning(f"Skipping non-dict row1 in JOIN: {type(row1)}")
	continue
	for row2 in rows2:
	# Ensure row2 is a dictionary
	if not isinstance(row2, dict):
	logger.warning(f"Skipping non-dict row2 in JOIN: {type(row2)}")
	continue
	if str(row1.get(col1, "")) == str(row2.get(col2, "")):
	# Combine rows
	combined_row = {}
	for col in columns:
	if '.' in col:
	# Handle aliased columns: table.col
	table_alias, col_name = col.split('.', 1)
	if table_alias == parsed.get("alias1") or table_alias == table1:
	combined_row[col] = row1.get(col_name, "")
	elif table_alias == parsed.get("alias2") or table_alias == table2:
	combined_row[col] = row2.get(col_name, "")
	else:
	# Handle simple columns
	if col in row1:
	combined_row[col] = row1[col]
	elif col in row2:
	combined_row[col] = row2[col]
	result_rows.append(combined_row)

	# Apply WHERE filter if specified
	if where:
	result_rows = self._apply_where_filter(result_rows, where)

	return result_rows

	except Exception as e:
	logger.error(f"Error executing JOIN query: {str(e)}")
	return {"error": str(e)}

	def _execute_cross_join_query(self, parsed: Dict) -> Dict[str, Any]:
	"""Execute CROSS JOIN query in file-based mode"""
	try:
	table1 = parsed["table1"]
	table2 = parsed["table2"]
	columns = parsed["columns"]
	where = parsed.get("where")

	# Get table data
	rows1 = self.tables.get(table1, [])
	rows2 = self.tables.get(table2, [])

	# Ensure rows are lists
	if not isinstance(rows1, list):
	logger.warning(f"Table {table1} contains non-list data: {type(rows1)}")
	rows1 = []
	if not isinstance(rows2, list):
	logger.warning(f"Table {table2} contains non-list data: {type(rows2)}")
	rows2 = []

	# Perform CROSS JOIN
	result_rows = []
	for row1 in rows1:
	# Ensure row1 is a dictionary
	if not isinstance(row1, dict):
	logger.warning(f"Skipping non-dict row1 in CROSS JOIN: {type(row1)}")
	continue
	for row2 in rows2:
	# Ensure row2 is a dictionary
	if not isinstance(row2, dict):
	logger.warning(f"Skipping non-dict row2 in CROSS JOIN: {type(row2)}")
	continue
	# Combine rows
	combined_row = {}
	for col in columns:
	if '.' in col:
	# Handle aliased columns: table.col
	table_alias, col_name = col.split('.', 1)
	if table_alias == parsed.get("alias1") or table_alias == table1:
	combined_row[col] = row1.get(col_name, "")
	elif table_alias == parsed.get("alias2") or table_alias == table2:
	combined_row[col] = row2.get(col_name, "")
	else:
	# Handle simple columns
	if col in row1:
	combined_row[col] = row1[col]
	elif col in row2:
	combined_row[col] = row2[col]
	result_rows.append(combined_row)

	# Apply WHERE filter if specified
	if where:
	result_rows = self._apply_where_filter(result_rows, where)

	return result_rows

	except Exception as e:
	logger.error(f"Error executing CROSS JOIN query: {str(e)}")
	return {"error": str(e)}

	def _get_database_type(self) -> DatabaseType:
	return DatabaseType.POSTGRESQL

	def connect(self) -> bool:
	return self._is_initialized

	def disconnect(self) -> bool:
	try:
	if self.conn:
	self.conn.close()
	self.conn = None
	self.cursor = None
	self._is_initialized = False
	logger.info("Disconnected from PostgreSQL")
	return True
	except Exception as e:
	logger.error(f"Error disconnecting: {str(e)}")
	return False

	def test_connection(self) -> bool:
	if self.file_based_mode:
	return self._is_initialized
	try:
	if self.conn:
	with self.conn.cursor() as cur:
	cur.execute("SELECT 1;")
	return True
	return False
	except Exception:
	return False

	def execute_query(self, query: Union[str, Dict, List], query_type: QueryType = None, **kwargs) -> Dict[str, Any]:
	if not self._is_initialized:
	return self.format_error_result("Database not initialized")

	# For file-based mode, keep existing logic
	if self.file_based_mode:
	return self._execute_file_based_query(query, query_type)

	# For remote PostgreSQL, use direct psycopg2 execution
	if self.conn is None:
	return self.format_error_result("PostgreSQL server not available")

	start_time = time.time()
	try:
	with self.conn.cursor(cursor_factory=psycopg2.extras.RealDictCursor) as cur:
	# Handle different query formats
	if isinstance(query, str):
	# Direct SQL string - execute as-is
	cur.execute(query)
	elif isinstance(query, dict):
	# Dict with SQL and params - use parameterized query
	sql = query.get("sql")
	params = query.get("params", None)
	if params:
	cur.execute(sql, params)
	else:
	cur.execute(sql)
	elif isinstance(query, list):
	# List of queries - execute each one
	for q in query:
	if isinstance(q, str):
	cur.execute(q)
	elif isinstance(q, dict):
	sql = q.get("sql")
	params = q.get("params", None)
	if params:
	cur.execute(sql, params)
	else:
	cur.execute(sql)
	else:
	return self.format_error_result("Unsupported query format", query_type)

	# Handle results
	if cur.description:
	result = cur.fetchall()
	else:
	result = {"rowcount": cur.rowcount}

	self.conn.commit()

	execution_time = time.time() - start_time
	return self.format_query_result(result, query_type or QueryType.SELECT, execution_time=execution_time)

	except Exception as e:
	execution_time = time.time() - start_time
	logger.error(f"Error executing PostgreSQL query: {str(e)}")
	# Rollback on error
	try:
	if self.conn:
	self.conn.rollback()
	except Exception as rollback_error:
	logger.warning(f"Error during rollback: {str(rollback_error)}")
	return self.format_error_result(str(e), query_type, execution_time=execution_time)

	def _execute_file_based_query(self, query: Union[str, Dict, List], query_type: QueryType = None) -> Dict[str, Any]:
	"""Execute query in file-based mode"""
	start_time = time.time()
	try:
	if isinstance(query, str):
	parsed = self._parse_sql_query(query)
	query_type = query_type or QueryType.SELECT

	# Ensure parsed is a dictionary with a type key
	if not isinstance(parsed, dict) or "type" not in parsed:
	logger.error(f"_execute_file_based_query: parsed is not a valid dict: {parsed}")
	return self.format_error_result(f"Failed to parse SQL query: {query}", query_type)

	logger.debug(f"Executing {parsed['type']} query: {parsed}")

	if parsed["type"] == "CREATE":
	table_name = parsed["table"]
	columns = parsed.get("columns", ["id"])
	if table_name not in self.tables:
	self.tables[table_name] = []
	# Ensure table is always a list
	if not isinstance(self.tables[table_name], list):
	logger.warning(f"Reinitializing table {table_name} as list (was {type(self.tables[table_name])})")
	self.tables[table_name] = []
	# Store schema as a hidden key
	self.tables[f"__schema__{table_name}"] = columns
	if self.auto_save:
	self._save_table_to_file(table_name)
	result = {"rowcount": 0}
	elif parsed["type"] == "INSERT":
	table_name = parsed["table"]
	columns = parsed["columns"]
	all_values = parsed["values"]
	if table_name not in self.tables:
	self.tables[table_name] = []
	# Ensure table is always a list
	if not isinstance(self.tables[table_name], list):
	logger.warning(f"Reinitializing table {table_name} as list (was {type(self.tables[table_name])})")
	self.tables[table_name] = []

	valid_rows = 0
	# Insert all rows
	for values in all_values:
	# Skip invalid rows (should have same number of values as columns)
	if len(values) != len(columns):
	logger.warning(f"Skipping invalid row: {values} (expected {len(columns)} values, got {len(values)})")
	continue
	# Ensure values is a list
	if not isinstance(values, list):
	logger.warning(f"Skipping non-list values: {type(values)}")
	continue
	row = {col: val for col, val in zip(columns, values)}
	row["id"] = len(self.tables[table_name]) + 1
	self.tables[table_name].append(row)
	valid_rows += 1

	if self.auto_save:
	self._save_table_to_file(table_name)
	result = {"rowcount": valid_rows}
	elif parsed["type"] == "SELECT":
	table_name = parsed["table"]
	columns = parsed["columns"]
	where = parsed.get("where")
	group_by = parsed.get("group_by")
	rows = self.tables.get(table_name, [])
	# Ensure rows is always a list
	if not isinstance(rows, list):
	logger.warning(f"Table {table_name} contains non-list data: {type(rows)}")
	rows = []

	# Debug logging
	logger.debug(f"SELECT query: table={table_name}, columns={columns}, where={where}, group_by={group_by}")
	logger.debug(f"Rows from table: {type(rows)}, length={len(rows) if isinstance(rows, list) else 'N/A'}")
	if isinstance(rows, list) and rows:
	logger.debug(f"First row type: {type(rows[0])}, content: {rows[0]}")

	# Apply WHERE filter
	if where:
	rows = self._apply_where_filter(rows, where)

	# Handle basic aggregation
	if group_by:
	result = self._apply_group_by(rows, group_by)
	else:
	# Apply column selection
	result = {"data": self._apply_column_selection(rows, columns)}

	elif parsed["type"] == "SELECT_JOIN":
	# Handle JOIN queries
	logger.debug(f"Executing JOIN query: {parsed}")
	join_result = self._execute_join_query(parsed)
	if isinstance(join_result, dict) and "error" in join_result:
	result = {"error": join_result["error"]}
	else:
	result = {"data": join_result}

	elif parsed["type"] == "SELECT_CROSS_JOIN":
	# Handle CROSS JOIN queries
	logger.debug(f"Executing CROSS JOIN query: {parsed}")
	cross_join_result = self._execute_cross_join_query(parsed)
	if isinstance(cross_join_result, dict) and "error" in cross_join_result:
	result = {"error": cross_join_result["error"]}
	else:
	result = {"data": cross_join_result}
	elif parsed["type"] == "UPDATE":
	table_name = parsed["table"]
	set_clause = parsed["set"]
	where = parsed.get("where")
	rows = self.tables.get(table_name, [])
	# Ensure rows is always a list
	if not isinstance(rows, list):
	logger.warning(f"Table {table_name} contains non-list data: {type(rows)}")
	rows = []
	# Parse set_clause: col1 = 'val1', col2 = 'val2'
	updates = dict(re.findall(r"(\w+) = '?([\w@.\- ]+)'?", set_clause))
	count = 0
	for r in rows:
	# Ensure r is a dictionary
	if not isinstance(r, dict):
	logger.warning(f"Skipping non-dict row in UPDATE: {type(r)}")
	continue
	match = True
	if where:
	m = re.match(r"(\w+) ([=><]+) '?([\w@.\- ]+)'?", where)
	if m:
	col, op, val = m.group(1), m.group(2), m.group(3)
	if op == "=" and str(r.get(col, "")) != val:
	match = False
	elif op == ">" and int(r.get(col, 0)) <= int(val):
	match = False
	elif op == "<" and int(r.get(col, 0)) >= int(val):
	match = False
	if match:
	r.update(updates)
	count += 1
	if self.auto_save:
	self._save_table_to_file(table_name)
	result = {"rowcount": count}
	elif parsed["type"] == "DELETE":
	table_name = parsed["table"]
	where = parsed.get("where")
	rows = self.tables.get(table_name, [])
	# Ensure rows is always a list
	if not isinstance(rows, list):
	logger.warning(f"Table {table_name} contains non-list data: {type(rows)}")
	rows = []
	if where:
	m = re.match(r"(\w+) ([=><]+) '?([\w@.\- ]+)'?", where)
	if m:
	col, op, val = m.group(1), m.group(2), m.group(3)
	if op == "=":
	new_rows = [r for r in rows if isinstance(r, dict) and str(r.get(col, "")) != val]
	elif op == ">":
	try:
	val_num = int(val)
	new_rows = [r for r in rows if isinstance(r, dict) and int(r.get(col, 0)) <= val_num]
	except ValueError:
	new_rows = rows
	else:
	new_rows = rows
	deleted_count = len(rows) - len(new_rows)
	self.tables[table_name] = new_rows
	else:
	deleted_count = 0
	else:
	deleted_count = len(rows)
	self.tables[table_name] = []
	if self.auto_save:
	self._save_table_to_file(table_name)
	result = {"rowcount": deleted_count}
	else:
	return self.format_error_result("Unsupported query type in file-based mode", query_type)
	execution_time = time.time() - start_time
	return self.format_query_result(result, query_type, execution_time=execution_time)
	else:
	return self.format_error_result("Unsupported query format in file-based mode", query_type)
	except Exception as e:
	execution_time = time.time() - start_time
	logger.error(f"Error executing file-based query: {str(e)}")
	logger.error(f"Query that caused error: {query}")
	logger.error(f"Query type: {query_type}")
	import traceback
	logger.error(f"Traceback: {traceback.format_exc()}")
	return self.format_error_result(str(e), query_type, execution_time=execution_time)

	def get_database_info(self) -> Dict[str, Any]:
	try:
	if not self._is_initialized:
	return self.format_error_result("Database not initialized")

	if self.file_based_mode:
	info = {
	"database": self.database_name,
	"user": "file_based",
	"table_count": len(self.tables),
	"connection_string": "file_based",
	"is_connected": True,
	"mode": "file_based"
	}
	else:
	if self.conn is None:
	return self.format_error_result("PostgreSQL server not available")

	with self.conn.cursor(cursor_factory=psycopg2.extras.RealDictCursor) as cur:
	cur.execute("SELECT current_database() as database, current_user as user")
	db_info = cur.fetchone()
	cur.execute("SELECT COUNT(*) as table_count FROM information_schema.tables WHERE table_schema = 'public'")
	table_count = cur.fetchone()["table_count"]
	info = {
	"database": db_info["database"],
	"user": db_info["user"],
	"table_count": table_count,
	"connection_string": self.connection_string,
	"is_connected": self._is_initialized
	}
	return self.format_query_result(info, QueryType.SELECT)
	except Exception as e:
	return self.format_error_result(str(e))

	def list_collections(self) -> List[str]:
	try:
	if self.file_based_mode:
	return list(self.tables.keys())
	if not self._is_initialized or self.conn is None:
	return []
	with self.conn.cursor() as cur:
	cur.execute("SELECT table_name FROM information_schema.tables WHERE table_schema = 'public'")
	tables = [row[0] for row in cur.fetchall()]
	return tables
	except Exception as e:
	logger.error(f"Error listing tables: {str(e)}")
	return []

	def get_collection_info(self, collection_name: str) -> Dict[str, Any]:
	try:
	if not self._is_initialized:
	return self.format_error_result("Database not initialized")

	if self.file_based_mode:
	if collection_name in self.tables:
	row_count = len(self.tables[collection_name])
	info = {
	"table_name": collection_name,
	"row_count": row_count,
	"columns": ["id"] # Simple column structure
	}
	else:
	return self.format_error_result(f"Table {collection_name} not found")
	else:
	if self.conn is None:
	return self.format_error_result("PostgreSQL server not available")

	with self.conn.cursor(cursor_factory=psycopg2.extras.RealDictCursor) as cur:
	cur.execute(f"SELECT COUNT(*) as row_count FROM {collection_name}")
	row_count = cur.fetchone()["row_count"]
	cur.execute("SELECT column_name, data_type FROM information_schema.columns WHERE table_name = %s", (collection_name,))
	columns = cur.fetchall()
	info = {
	"table_name": collection_name,
	"row_count": row_count,
	"columns": columns
	}
	return self.format_query_result(info, QueryType.SELECT)
	except Exception as e:
	return self.format_error_result(str(e))

	def get_schema(self, collection_name: str = None) -> Dict[str, Any]:
	try:
	if not self._is_initialized:
	return self.format_error_result("Database not initialized")

	if self.file_based_mode:
	if collection_name:
	if collection_name in self.tables:
	schema = {"id": "integer"}
	return self.format_query_result({"table_name": collection_name, "schema": schema}, QueryType.SELECT)
	else:
	return self.format_error_result(f"Table {collection_name} not found")
	else:
	schemas = {}
	for table_name in self.tables:
	schemas[table_name] = {"id": "integer"}
	return self.format_query_result({"database_name": self.database_name, "schemas": schemas}, QueryType.SELECT)
	else:
	if self.conn is None:
	return self.format_error_result("PostgreSQL server not available")

	with self.conn.cursor(cursor_factory=psycopg2.extras.RealDictCursor) as cur:
	if collection_name:
	cur.execute("SELECT column_name, data_type FROM information_schema.columns WHERE table_name = %s", (collection_name,))
	columns = cur.fetchall()
	schema = {col["column_name"]: col["data_type"] for col in columns}
	return self.format_query_result({"table_name": collection_name, "schema": schema}, QueryType.SELECT)
	else:
	cur.execute("SELECT table_name FROM information_schema.tables WHERE table_schema = 'public'")
	tables = [row[0] for row in cur.fetchall()]
	schemas = {}
	for table in tables:
	cur.execute("SELECT column_name, data_type FROM information_schema.columns WHERE table_name = %s", (table,))
	columns = cur.fetchall()
	schemas[table] = {col["column_name"]: col["data_type"] for col in columns}
	return self.format_query_result({"database_name": self.database_name, "schemas": schemas}, QueryType.SELECT)
	except Exception as e:
	return self.format_error_result(str(e))

	def get_supported_query_types(self) -> List[QueryType]:
	return [
	QueryType.SELECT,
	QueryType.INSERT,
	QueryType.UPDATE,
	QueryType.DELETE,
	QueryType.CREATE,
	QueryType.DROP,
	QueryType.ALTER,
	QueryType.INDEX
	]

	def get_capabilities(self) -> Dict[str, Any]:
	base_capabilities = super().get_capabilities()
	base_capabilities.update({
	"supports_sql": True,
	"supports_transactions": not self.file_based_mode,
	"supports_indexing": not self.file_based_mode,
	"schema_flexible": self.file_based_mode,
	"file_based_mode": self.file_based_mode
	})
	return base_capabilities

	# Tool classes
	class PostgreSQLExecuteTool(Tool):
	name: str = "postgresql_execute"
	description: str = "Execute arbitrary SQL queries on PostgreSQL."
	inputs: Dict[str, Dict[str, str]] = {
	"query": {"type": "string", "description": "SQL query to execute (can be SELECT, INSERT, UPDATE, DELETE, etc.)"},
	"query_type": {"type": "string", "description": "Type of query (select, insert, update, delete, create, drop, alter, index) - auto-detected if not provided"}
	}
	required: Optional[List[str]] = ["query"]
	def __init__(self, database: PostgreSQLDatabase = None):
	super().__init__()
	self.database = database
	def __call__(self, query: str, query_type: str = None) -> Dict[str, Any]:
	try:
	if not self.database:
	return {"success": False, "error": "PostgreSQL database not initialized", "data": None}

	# Simply pass the SQL query directly to the database
	# No more complex parsing - let psycopg2 handle it
	query_type_enum = None
	if query_type:
	try:
	query_type_enum = QueryType(query_type.lower())
	except ValueError:
	return {"success": False, "error": f"Invalid query type: {query_type}", "data": None}

	result = self.database.execute_query(query=query, query_type=query_type_enum)
	return result
	except Exception as e:
	logger.error(f"Error in postgresql_execute tool: {str(e)}")
	return {"success": False, "error": str(e), "data": None}

	class PostgreSQLFindTool(Tool):
	name: str = "postgresql_find"
	description: str = "Find (SELECT) rows from a PostgreSQL table."
	inputs: Dict[str, Dict[str, str]] = {
	"table_name": {"type": "string", "description": "Table name to query"},
	"where": {"type": "string", "description": "WHERE clause (optional, e.g., 'age > 18')"},
	"columns": {"type": "string", "description": "Comma-separated columns to select (default '*')"},
	"limit": {"type": "integer", "description": "Maximum number of rows to return (optional)"},
	"offset": {"type": "integer", "description": "Number of rows to skip (optional)"},
	"sort": {"type": "string", "description": "ORDER BY clause (optional, e.g., 'age ASC')"}
	}
	required: Optional[List[str]] = ["table_name"]
	def __init__(self, database: PostgreSQLDatabase = None):
	super().__init__()
	self.database = database
	def __call__(self, table_name: str, where: str = None, columns: str = "*", limit: int = None, offset: int = None, sort: str = None) -> Dict[str, Any]:
	try:
	if not self.database:
	return {"success": False, "error": "PostgreSQL database not initialized", "data": None}
	sql = f"SELECT {columns} FROM {table_name}"
	if where:
	sql += f" WHERE {where}"
	if sort:
	sql += f" ORDER BY {sort}"
	if limit is not None:
	sql += f" LIMIT {limit}"
	if offset is not None:
	sql += f" OFFSET {offset}"
	result = self.database.execute_query(sql, QueryType.SELECT)
	return result
	except Exception as e:
	logger.error(f"Error in postgresql_find tool: {str(e)}")
	return {"success": False, "error": str(e), "data": None}

	class PostgreSQLUpdateTool(Tool):
	name: str = "postgresql_update"
	description: str = "Update rows in a PostgreSQL table."
	inputs: Dict[str, Dict[str, str]] = {
	"table_name": {"type": "string", "description": "Table name to update"},
	"set": {"type": "string", "description": "SET clause (e.g., 'status = \'active\'')"},
	"where": {"type": "string", "description": "WHERE clause (optional)"}
	}
	required: Optional[List[str]] = ["table_name", "set"]
	def __init__(self, database: PostgreSQLDatabase = None):
	super().__init__()
	self.database = database
	def __call__(self, table_name: str, set: str, where: str = None) -> Dict[str, Any]:
	try:
	if not self.database:
	return {"success": False, "error": "PostgreSQL database not initialized", "data": None}
	sql = f"UPDATE {table_name} SET {set}"
	if where:
	sql += f" WHERE {where}"
	result = self.database.execute_query(sql, QueryType.UPDATE)
	return result
	except Exception as e:
	logger.error(f"Error in postgresql_update tool: {str(e)}")
	return {"success": False, "error": str(e), "data": None}

	class PostgreSQLCreateTool(Tool):
	name: str = "postgresql_create"
	description: str = "Create a table or other object in PostgreSQL."
	inputs: Dict[str, Dict[str, str]] = {
	"query": {"type": "string", "description": "CREATE statement (e.g., CREATE TABLE ...)"}
	}
	required: Optional[List[str]] = ["query"]
	def __init__(self, database: PostgreSQLDatabase = None):
	super().__init__()
	self.database = database
	def __call__(self, query: str) -> Dict[str, Any]:
	try:
	if not self.database:
	return {"success": False, "error": "PostgreSQL database not initialized", "data": None}
	result = self.database.execute_query(query, QueryType.CREATE)
	return result
	except Exception as e:
	logger.error(f"Error in postgresql_create tool: {str(e)}")
	return {"success": False, "error": str(e), "data": None}

	class PostgreSQLDeleteTool(Tool):
	name: str = "postgresql_delete"
	description: str = "Delete rows from a PostgreSQL table."
	inputs: Dict[str, Dict[str, str]] = {
	"table_name": {"type": "string", "description": "Table name to delete from"},
	"where": {"type": "string", "description": "WHERE clause (optional)"}
	}
	required: Optional[List[str]] = ["table_name"]
	def __init__(self, database: PostgreSQLDatabase = None):
	super().__init__()
	self.database = database
	def __call__(self, table_name: str, where: str = None) -> Dict[str, Any]:
	try:
	if not self.database:
	return {"success": False, "error": "PostgreSQL database not initialized", "data": None}
	sql = f"DELETE FROM {table_name}"
	if where:
	sql += f" WHERE {where}"
	result = self.database.execute_query(sql, QueryType.DELETE)
	return result
	except Exception as e:
	logger.error(f"Error in postgresql_delete tool: {str(e)}")
	return {"success": False, "error": str(e), "data": None}

	class PostgreSQLInfoTool(Tool):
	name: str = "postgresql_info"
	description: str = "Get PostgreSQL database and table information."
	inputs: Dict[str, Dict[str, str]] = {
	"info_type": {"type": "string", "description": "Type of information (database, tables, table, schema, capabilities)"},
	"table_name": {"type": "string", "description": "Table name for table-specific info (optional)"}
	}
	required: Optional[List[str]] = []
	def __init__(self, database: PostgreSQLDatabase = None):
	super().__init__()
	self.database = database
	def __call__(self, info_type: str = "database", table_name: str = None) -> Dict[str, Any]:
	try:
	if not self.database:
	return {"success": False, "error": "PostgreSQL database not initialized", "data": None}
	info_type = info_type.lower()
	if info_type == "database":
	result = self.database.get_database_info()
	elif info_type == "tables":
	tables = self.database.list_collections()
	result = {"success": True, "data": tables, "table_count": len(tables)}
	elif info_type == "table" and table_name:
	result = self.database.get_collection_info(table_name)
	elif info_type == "schema":
	result = self.database.get_schema(table_name)
	elif info_type == "capabilities":
	result = {"success": True, "data": self.database.get_capabilities()}
	else:
	return {"success": False, "error": f"Invalid info type: {info_type}", "data": None}
	return result
	except Exception as e:
	logger.error(f"Error in postgresql_info tool: {str(e)}")
	return {"success": False, "error": str(e), "data": None}

	class PostgreSQLToolkit(Toolkit):
	def __init__(self,
	name: str = "PostgreSQLToolkit",
	connection_string: str = None,
	database_name: str = None,
	local_path: str = None,
	auto_save: bool = True,
	**kwargs):
	database = PostgreSQLDatabase(
	connection_string=connection_string,
	database_name=database_name,
	local_path=local_path,
	auto_save=auto_save,
	**kwargs
	)
	tools = [
	PostgreSQLExecuteTool(database=database),
	PostgreSQLFindTool(database=database),
	PostgreSQLUpdateTool(database=database),
	PostgreSQLCreateTool(database=database),
	PostgreSQLDeleteTool(database=database),
	PostgreSQLInfoTool(database=database)
	]
	super().__init__(name=name, tools=tools)
	self.database = database
	self.connection_string = connection_string
	self.database_name = database_name
	self.local_path = local_path
	self.auto_save = auto_save
	import atexit
	atexit.register(self._cleanup)
	def _cleanup(self):
	try:
	if self.database:
	self.database.disconnect()
	logger.info("Disconnected from PostgreSQL database")
	except Exception as e:
	logger.warning(f"Error during cleanup: {str(e)}")
	def get_capabilities(self) -> Dict[str, Any]:
	if self.database:
	capabilities = self.database.get_capabilities()
	capabilities.update({
	"is_local_database": self.database.is_local_database,
	"local_path": str(self.database.local_path) if self.database.local_path else None,
	"auto_save": self.database.auto_save
	})
	return capabilities
	return {"error": "PostgreSQL database not initialized"}
	def connect(self) -> bool:
	return self.database.connect() if self.database else False
	def disconnect(self) -> bool:
	return self.database.disconnect() if self.database else False
	def test_connection(self) -> bool:
	return self.database.test_connection() if self.database else False
	def get_database(self) -> PostgreSQLDatabase:
	return self.database
	def get_local_info(self) -> Dict[str, Any]:
	return {
	"is_local_database": self.database.is_local_database,
	"local_path": str(self.database.local_path) if self.database.local_path else None,
	"auto_save": self.database.auto_save,
	"database_name": self.database_name,
	"connection_string": self.connection_string
	} if self.database else {"error": "Database not initialized"}