Spaces:

anu151105
/

agentic-browser

Sleeping

App Files Files Community

agentic-browser / src /memory /continuous_memory.py

anu151105

Initial deployment of Agentic Browser

24a7f55 6 months ago

raw

history blame contribute delete

19 kB

	"""
	Continuous Memory module for the Memory & Learning Layer.

	This module implements both short-term and long-term memory capabilities,
	with vector-based storage and retrieval for continuous learning.
	"""

	import asyncio
	import json
	import logging
	import os
	import time
	import uuid
	from typing import Dict, List, Any, Optional, Union, Tuple

	import numpy as np

	# Configure logging
	logging.basicConfig(level=logging.INFO)
	logger = logging.getLogger(__name__)


	class ContinuousMemory:
	"""
	Implements memory systems for storing and learning from experiences.

	This class provides both short-term memory for action history and page states,
	and long-term vector storage for patterns and task experiences.
	"""

	def __init__(self):
	"""Initialize the ContinuousMemory."""
	# Short-term memory (recency-based)
	self.recent_actions = [] # List of recent actions
	self.recent_states = {} # Map of URL -> recent page state
	self.max_recent_actions = 100
	self.max_recent_states = 20

	# Long-term memory (vector database)
	self.vector_db_client = None
	self.vector_db_collection = None
	self.vector_db_type = os.environ.get("VECTOR_DB_TYPE", "chromadb")
	self.vector_db_url = os.environ.get("VECTOR_DB_URL", "")
	self.vector_db_dimensions = 1536 # Default for OpenAI embeddings

	# Embedding client
	self.embedding_client = None
	self.embedding_model = os.environ.get("EMBEDDING_MODEL", "text-embedding-3-large")

	logger.info("ContinuousMemory instance created")

	async def initialize(self):
	"""Initialize vector database and embedding clients."""
	try:
	# Initialize embedding client
	import openai
	self.embedding_client = openai.AsyncClient(
	api_key=os.environ.get("OPENAI_API_KEY")
	)

	# Initialize vector database based on configuration
	if self.vector_db_type == "chromadb":
	await self._initialize_chromadb()
	elif self.vector_db_type == "pinecone":
	await self._initialize_pinecone()
	elif self.vector_db_type == "weaviate":
	await self._initialize_weaviate()
	else:
	# Default to in-memory vector store if configured type is not available
	await self._initialize_in_memory_vector_store()

	logger.info(f"ContinuousMemory initialized with {self.vector_db_type} vector database")
	return True

	except Exception as e:
	logger.error(f"Error initializing continuous memory: {str(e)}")
	# Fall back to in-memory vector store
	await self._initialize_in_memory_vector_store()
	return False

	async def _initialize_chromadb(self):
	"""Initialize ChromaDB vector database."""
	try:
	import chromadb
	from chromadb.config import Settings

	# Connect to ChromaDB
	self.vector_db_client = chromadb.Client(
	Settings(
	chroma_api_impl="rest",
	chroma_server_host=self.vector_db_url.split("://")[1].split(":")[0] if "://" in self.vector_db_url else "localhost",
	chroma_server_http_port=int(self.vector_db_url.split(":")[-1]) if ":" in self.vector_db_url else 8000
	)
	)

	# Create or get collection
	self.vector_db_collection = self.vector_db_client.get_or_create_collection(
	name="agent_memory",
	metadata={"hnsw:space": "cosine"}
	)

	logger.info("ChromaDB vector database initialized")

	except Exception as e:
	logger.error(f"Error initializing ChromaDB: {str(e)}")
	# Fall back to in-memory vector store
	await self._initialize_in_memory_vector_store()

	async def _initialize_pinecone(self):
	"""Initialize Pinecone vector database."""
	try:
	import pinecone

	# Initialize connection
	pinecone.init(
	api_key=os.environ.get("PINECONE_API_KEY", ""),
	environment=os.environ.get("PINECONE_ENVIRONMENT", "us-west1-gcp")
	)

	# Get or create index
	index_name = "agent-memory"
	if index_name not in pinecone.list_indexes():
	pinecone.create_index(
	name=index_name,
	dimension=self.vector_db_dimensions,
	metric="cosine"
	)

	self.vector_db_client = pinecone
	self.vector_db_collection = pinecone.Index(index_name)

	logger.info("Pinecone vector database initialized")

	except Exception as e:
	logger.error(f"Error initializing Pinecone: {str(e)}")
	# Fall back to in-memory vector store
	await self._initialize_in_memory_vector_store()

	async def _initialize_weaviate(self):
	"""Initialize Weaviate vector database."""
	try:
	import weaviate
	from weaviate.auth import AuthApiKey

	# Connect to Weaviate
	auth_config = None
	weaviate_key = os.environ.get("WEAVIATE_API_KEY")
	if weaviate_key:
	auth_config = AuthApiKey(api_key=weaviate_key)

	client = weaviate.Client(
	url=self.vector_db_url or "http://localhost:8080",
	auth_client_secret=auth_config,
	)

	# Create schema if it doesn't exist
	if not client.schema.exists("AgentMemory"):
	class_obj = {
	"class": "AgentMemory",
	"description": "Memory entries for the agent's experiences",
	"vectorizer": "text2vec-openai",
	"moduleConfig": {
	"text2vec-openai": {
	"model": self.embedding_model,
	"type": "text"
	}
	},
	"properties": [
	{
	"name": "task",
	"dataType": ["text"],
	"description": "The task description",
	},
	{
	"name": "content",
	"dataType": ["text"],
	"description": "The memory content",
	},
	{
	"name": "type",
	"dataType": ["string"],
	"description": "The type of memory",
	},
	{
	"name": "timestamp",
	"dataType": ["number"],
	"description": "When the memory was created",
	}
	]
	}
	client.schema.create_class(class_obj)

	self.vector_db_client = client
	self.vector_db_collection = "AgentMemory" # Collection is the class name in Weaviate

	logger.info("Weaviate vector database initialized")

	except Exception as e:
	logger.error(f"Error initializing Weaviate: {str(e)}")
	# Fall back to in-memory vector store
	await self._initialize_in_memory_vector_store()

	async def _initialize_in_memory_vector_store(self):
	"""Initialize a simple in-memory vector store as fallback."""
	logger.warning("Using in-memory vector store (data will not persist)")

	# Simple in-memory vector store
	self.vector_db_client = {
	"vectors": [], # List of (id, vector, metadata) tuples
	"type": "in-memory"
	}
	self.vector_db_collection = "memory"
	self.vector_db_type = "in-memory"

	async def store_experience(self, task: str, actions: List[Dict], outcome: Dict) -> str:
	"""
	Store an experience in long-term memory.

	Args:
	task: Task description
	actions: List of action steps taken
	outcome: Result of the task execution

	Returns:
	str: ID of the stored memory
	"""
	try:
	# Generate a unique ID for this memory
	memory_id = str(uuid.uuid4())

	# Format the experience for storage
	content = json.dumps({
	"task": task,
	"actions": actions,
	"outcome": outcome,
	"timestamp": time.time()
	})

	# Get embedding for the task and content
	embedding = await self._get_embedding(f"{task}\n{content}")

	# Store in vector database
	if self.vector_db_type == "chromadb":
	self.vector_db_collection.add(
	ids=[memory_id],
	embeddings=[embedding],
	metadatas=[{"task": task, "success": outcome.get("success", False)}],
	documents=[content]
	)

	elif self.vector_db_type == "pinecone":
	self.vector_db_collection.upsert([
	(memory_id, embedding, {"task": task, "success": outcome.get("success", False), "content": content})
	])

	elif self.vector_db_type == "weaviate":
	self.vector_db_client.data_object.create(
	{
	"task": task,
	"content": content,
	"type": "experience",
	"timestamp": time.time()
	},
	class_name=self.vector_db_collection,
	uuid=memory_id,
	vector=embedding
	)

	elif self.vector_db_type == "in-memory":
	self.vector_db_client["vectors"].append((
	memory_id,
	embedding,
	{
	"task": task,
	"success": outcome.get("success", False),
	"content": content
	}
	))

	logger.info(f"Experience stored in memory with ID {memory_id}")
	return memory_id

	except Exception as e:
	logger.error(f"Error storing experience: {str(e)}")
	return ""

	async def retrieve_similar_tasks(self, current_task: str, limit: int = 5) -> List[Dict]:
	"""
	Retrieve experiences related to the current task.

	Args:
	current_task: Current task description
	limit: Maximum number of experiences to retrieve

	Returns:
	List[Dict]: List of related experiences
	"""
	try:
	# Get embedding for the current task
	embedding = await self._get_embedding(current_task)

	results = []

	# Query vector database based on type
	if self.vector_db_type == "chromadb":
	query_results = self.vector_db_collection.query(
	query_embeddings=[embedding],
	n_results=limit
	)

	for i, (doc_id, doc, metadata, distance) in enumerate(zip(
	query_results["ids"][0],
	query_results["documents"][0],
	query_results["metadatas"][0],
	query_results["distances"][0]
	)):
	# Parse stored JSON
	content = json.loads(doc)
	results.append({
	"id": doc_id,
	"task": metadata["task"],
	"actions": content["actions"],
	"outcome": content["outcome"],
	"similarity": 1 - distance # Convert distance to similarity score
	})

	elif self.vector_db_type == "pinecone":
	query_results = self.vector_db_collection.query(
	vector=embedding,
	top_k=limit,
	include_metadata=True
	)

	for match in query_results["matches"]:
	# Parse stored JSON
	content = json.loads(match["metadata"]["content"])
	results.append({
	"id": match["id"],
	"task": match["metadata"]["task"],
	"actions": content["actions"],
	"outcome": content["outcome"],
	"similarity": match["score"]
	})

	elif self.vector_db_type == "weaviate":
	query_results = (
	self.vector_db_client.query
	.get(self.vector_db_collection, ["task", "content", "_additional {certainty}"])
	.with_near_vector({"vector": embedding})
	.with_limit(limit)
	.do()
	)

	for obj in query_results["data"]["Get"][self.vector_db_collection]:
	# Parse stored JSON
	content = json.loads(obj["content"])
	results.append({
	"id": obj["_additional"]["id"],
	"task": obj["task"],
	"actions": content["actions"],
	"outcome": content["outcome"],
	"similarity": obj["_additional"]["certainty"]
	})

	elif self.vector_db_type == "in-memory":
	# Simple cosine similarity search
	similarities = []
	for memory_id, memory_vector, metadata in self.vector_db_client["vectors"]:
	similarity = self._cosine_similarity(embedding, memory_vector)
	similarities.append((memory_id, similarity, metadata))

	# Sort by similarity and get top results
	similarities.sort(key=lambda x: x[1], reverse=True)
	for memory_id, similarity, metadata in similarities[:limit]:
	# Parse stored JSON
	content = json.loads(metadata["content"])
	results.append({
	"id": memory_id,
	"task": metadata["task"],
	"actions": content["actions"],
	"outcome": content["outcome"],
	"similarity": similarity
	})

	logger.info(f"Retrieved {len(results)} similar tasks")
	return results

	except Exception as e:
	logger.error(f"Error retrieving similar tasks: {str(e)}")
	return []

	def record_action(self, action: Dict, result: Dict):
	"""
	Record an action in short-term memory.

	Args:
	action: Action configuration
	result: Action execution result
	"""
	# Add to recent actions list
	self.recent_actions.append({
	"timestamp": time.time(),
	"action": action,
	"result": result
	})

	# Maintain maximum size
	if len(self.recent_actions) > self.max_recent_actions:
	self.recent_actions.pop(0) # Remove oldest action

	def record_state(self, url: str, state: Dict):
	"""
	Record a page state in short-term memory.

	Args:
	url: Page URL
	state: Page state data
	"""
	# Add to recent states map
	self.recent_states[url] = {
	"timestamp": time.time(),
	"state": state
	}

	# Maintain maximum size
	if len(self.recent_states) > self.max_recent_states:
	# Remove oldest state
	oldest_url = min(self.recent_states.keys(), key=lambda k: self.recent_states[k]["timestamp"])
	del self.recent_states[oldest_url]

	def get_recent_actions(self, count: int = 10) -> List[Dict]:
	"""
	Get recent actions from short-term memory.

	Args:
	count: Maximum number of actions to retrieve

	Returns:
	List[Dict]: List of recent actions
	"""
	return self.recent_actions[-count:]

	def get_recent_state(self, url: str) -> Optional[Dict]:
	"""
	Get the recent state for a URL.

	Args:
	url: Page URL

	Returns:
	Optional[Dict]: Recent state if available
	"""
	return self.recent_states.get(url, {}).get("state")

	async def _get_embedding(self, text: str) -> List[float]:
	"""
	Get an embedding vector for text.

	Args:
	text: Text to embed

	Returns:
	List[float]: Embedding vector
	"""
	if self.embedding_client:
	try:
	response = await self.embedding_client.embeddings.create(
	input=text,
	model=self.embedding_model
	)
	return response.data[0].embedding
	except Exception as e:
	logger.error(f"Error getting embedding: {str(e)}")

	# Fallback to random embedding if client fails
	return list(np.random.randn(self.vector_db_dimensions).astype(float))

	def _cosine_similarity(self, vec_a: List[float], vec_b: List[float]) -> float:
	"""
	Calculate cosine similarity between two vectors.

	Args:
	vec_a: First vector
	vec_b: Second vector

	Returns:
	float: Cosine similarity (-1 to 1)
	"""
	dot_product = sum(a * b for a, b in zip(vec_a, vec_b))
	norm_a = sum(a * a for a in vec_a) ** 0.5
	norm_b = sum(b * b for b in vec_b) ** 0.5

	if norm_a == 0 or norm_b == 0:
	return 0

	return dot_product / (norm_a * norm_b)

	async def shutdown(self):
	"""Clean up resources."""
	logger.info("ContinuousMemory resources cleaned up")