Text Generation
Transformers
Diffusers
Safetensors
English
gpt_oss
phillnet-2
gpt-oss
multimodal
image-generation
video-generation
speech
audio
custom-code
conversational
custom_code
Instructions to use ayjays132/Phillnet-2 with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use ayjays132/Phillnet-2 with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("text-generation", model="ayjays132/Phillnet-2", trust_remote_code=True) messages = [ {"role": "user", "content": "Who are you?"}, ] pipe(messages)# Load model directly from transformers import AutoTokenizer, AutoModelForCausalLM tokenizer = AutoTokenizer.from_pretrained("ayjays132/Phillnet-2", trust_remote_code=True) model = AutoModelForCausalLM.from_pretrained("ayjays132/Phillnet-2", trust_remote_code=True) messages = [ {"role": "user", "content": "Who are you?"}, ] inputs = tokenizer.apply_chat_template( messages, add_generation_prompt=True, tokenize=True, return_dict=True, return_tensors="pt", ).to(model.device) outputs = model.generate(**inputs, max_new_tokens=40) print(tokenizer.decode(outputs[0][inputs["input_ids"].shape[-1]:])) - Notebooks
- Google Colab
- Kaggle
- Local Apps
- vLLM
How to use ayjays132/Phillnet-2 with vLLM:
Install from pip and serve model
# Install vLLM from pip: pip install vllm # Start the vLLM server: vllm serve "ayjays132/Phillnet-2" # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:8000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "ayjays132/Phillnet-2", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }'Use Docker
docker model run hf.co/ayjays132/Phillnet-2
- SGLang
How to use ayjays132/Phillnet-2 with SGLang:
Install from pip and serve model
# Install SGLang from pip: pip install sglang # Start the SGLang server: python3 -m sglang.launch_server \ --model-path "ayjays132/Phillnet-2" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "ayjays132/Phillnet-2", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }'Use Docker images
docker run --gpus all \ --shm-size 32g \ -p 30000:30000 \ -v ~/.cache/huggingface:/root/.cache/huggingface \ --env "HF_TOKEN=<secret>" \ --ipc=host \ lmsysorg/sglang:latest \ python3 -m sglang.launch_server \ --model-path "ayjays132/Phillnet-2" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "ayjays132/Phillnet-2", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }' - Docker Model Runner
How to use ayjays132/Phillnet-2 with Docker Model Runner:
docker model run hf.co/ayjays132/Phillnet-2
| """ | |
| Memory Graph Module | |
| Graph-based memory storage system with compression, retrieval, and RAG integration. | |
| """ | |
| import os | |
| import json | |
| import gzip | |
| import base64 | |
| import time | |
| import torch | |
| import torch.nn.functional as F | |
| import networkx as nx | |
| import numpy as np | |
| import re | |
| from collections import deque | |
| from typing import List, Optional, Any | |
| from networkx.readwrite import json_graph | |
| from sklearn.decomposition import PCA | |
| try: | |
| import config as _config_module # optional top-level config module | |
| except ModuleNotFoundError: | |
| _config_module = None # graceful — MemoryGraph works without it | |
| try: | |
| from rag.rag_generator import RAGGenerator | |
| except ImportError: | |
| RAGGenerator = None | |
| import sys | |
| # Try to import shared model for encoder | |
| SHARED_MODEL_AVAILABLE = False | |
| get_shared_model = None | |
| get_shared_tokenizer = None | |
| get_shared_adapter = None | |
| try: | |
| from shared_model import get_shared_adapter, get_shared_model, get_shared_tokenizer | |
| SHARED_MODEL_AVAILABLE = True | |
| except Exception: | |
| pass | |
| class MemoryGraph: | |
| __slots__ = ( | |
| 'device', 'graph_file', 'max_nodes', 'save_interval', 'similarity_threshold', | |
| 'compression_level', 'quantization_bits', 'pca', 'original_dim', | |
| 'graph', 'id_counter', 'addition_count', 'node_queue', | |
| 'access_counts', 'last_accessed', 'importance_scores', 'encoder', 'rag_generator', | |
| 'use_learnable_lru', 'importance_decay', 'access_boost', 'connectivity_weight', | |
| 'vae_compressor', 'use_vae_compression', 'vae_latent_dim', 'memory_pressure_threshold', | |
| 'node_types', 'temporal_connections', 'consolidation_buffer', 'forgetting_curves', | |
| 'use_combined_compression', 'pca_on_vae', 'memory_hierarchies', | |
| 'enable_memory_types', 'enable_temporal_connections', 'enable_consolidation', | |
| 'shared_model', 'shared_tokenizer', 'enable_vae_training', 'vae_learning_rate', 'vae_weights_path', | |
| 'embedding_cache', 'cache_hits', 'cache_misses', 'cache_max_size', | |
| 'forgetation_model', 'forgetation_history', 'forgetation_learning_rate', 'forgetation_buffer', 'forgetation_optimizer', | |
| 'forgetation_shared_model', 'forgetation_shared_inner', 'forgetation_shared_dim', 'shared_adapter', | |
| 'last_save_time', 'save_interval_seconds', 'min_save_interval' | |
| ) | |
| def __init__( | |
| self, | |
| device: str = "cuda", | |
| graph_file: str = 'memory_graph.json.gz', | |
| max_nodes: Optional[int] = None, # None = infinite growth | |
| save_interval: int = 100, | |
| save_interval_seconds: int = 120, | |
| min_save_interval: int = 10, | |
| similarity_threshold: float = 0.75, | |
| compression_level: float = 0.5, | |
| quantization_bits: int = 8, | |
| rag_generator_instance: Optional[RAGGenerator] = None, | |
| use_vae_compression: bool = True, # Use VAE + PCA combined compression | |
| vae_latent_dim: int = 128, # VAE compression target (512 -> 128 = 4x compression) | |
| memory_pressure_threshold: float = 0.85, # Only prune when GPU memory > 85% | |
| use_combined_compression: bool = True, # Use VAE -> PCA pipeline (no fallbacks) | |
| enable_memory_types: bool = True, # Enable different memory node types | |
| enable_temporal_connections: bool = True, # Enable temporal memory connections | |
| enable_consolidation: bool = True, # Enable memory consolidation | |
| shared_model: Optional[Any] = None, # Shared model for memory efficiency | |
| shared_tokenizer: Optional[Any] = None, # Shared tokenizer | |
| enable_vae_training: bool = True, # Enable VAE learning over time | |
| vae_learning_rate: float = 1e-4 # VAE learning rate | |
| ): | |
| # Core setup - FORCE CUDA if available | |
| if torch.cuda.is_available() and device != 'cpu': | |
| self.device = torch.device("cuda") | |
| else: | |
| self.device = torch.device(device if device == 'cpu' else 'cpu') | |
| graph_file = self._resolve_graph_path(graph_file) | |
| # Normalize graph_file to always use .gz extension for compressed storage | |
| if graph_file.endswith('.gz'): | |
| self.graph_file = graph_file | |
| elif graph_file.endswith('.json'): | |
| # Convert .json to .json.gz | |
| self.graph_file = graph_file + '.gz' | |
| # Migrate old uncompressed file if it exists | |
| if os.path.exists(graph_file) and not os.path.exists(self.graph_file): | |
| print(f"[MIGRATE] Found uncompressed {graph_file}, will migrate to compressed format on next save") | |
| else: | |
| # Add .gz extension if not present | |
| self.graph_file = graph_file + '.gz' | |
| self.max_nodes = max_nodes # None = infinite growth | |
| self.save_interval = save_interval | |
| self.save_interval_seconds = int(save_interval_seconds) | |
| self.min_save_interval = max(1, int(min_save_interval)) | |
| self.last_save_time = time.time() | |
| self.similarity_threshold = similarity_threshold | |
| self.compression_level = compression_level | |
| self.quantization_bits = quantization_bits | |
| self.use_vae_compression = use_vae_compression | |
| self.vae_latent_dim = vae_latent_dim | |
| self.memory_pressure_threshold = memory_pressure_threshold | |
| self.use_combined_compression = use_combined_compression # VAE -> PCA pipeline | |
| self.enable_memory_types = enable_memory_types | |
| self.enable_temporal_connections = enable_temporal_connections | |
| self.enable_consolidation = enable_consolidation | |
| # Shared model attributes for memory efficiency | |
| self.shared_model = shared_model | |
| self.shared_tokenizer = shared_tokenizer | |
| self.shared_adapter = None | |
| if SHARED_MODEL_AVAILABLE and get_shared_adapter is not None: | |
| try: | |
| self.shared_adapter = get_shared_adapter() | |
| except Exception: | |
| self.shared_adapter = None | |
| if self.shared_adapter is not None: | |
| self.shared_model = self.shared_adapter.get_model() | |
| self.shared_tokenizer = self.shared_adapter.get_tokenizer() | |
| if self.shared_model is None and SHARED_MODEL_AVAILABLE and get_shared_model is not None: | |
| try: | |
| self.shared_model = get_shared_model() | |
| if self.shared_model is not None: | |
| print("[MEMORY] Using shared model for memory efficiency (zero overhead)") | |
| except Exception as e: | |
| print(f"[WARN] Could not get shared model: {e}") | |
| # VAE training settings | |
| self.enable_vae_training = enable_vae_training | |
| self.vae_learning_rate = vae_learning_rate | |
| # Internal state | |
| self.pca = None | |
| self.pca_on_vae = None # PCA applied to VAE latent space | |
| self.original_dim = None | |
| self.vae_compressor = None # VAE compressor for better compression | |
| self.vae_weights_path = self.graph_file.replace('.json.gz', '_vae_weights.pt') if self.graph_file.endswith('.gz') else self.graph_file.replace('.json', '_vae_weights.pt') # Path to save/load VAE weights | |
| # ⚡ INFINITE INTELLIGENCE: Embedding cache for zero-overhead reuse | |
| self.embedding_cache = {} # Cache compressed embeddings by hash | |
| self.cache_hits = 0 | |
| self.cache_misses = 0 | |
| self.cache_max_size = 1000 # Maximum cache size (LRU eviction) | |
| self.graph = nx.Graph() | |
| self.id_counter = 0 | |
| self.addition_count = 0 | |
| self.node_queue = deque() | |
| self.access_counts = {} | |
| self.last_accessed = {} | |
| self.importance_scores = {} # Learnable importance scores for each node | |
| self.encoder = None # Shared model will be used for query encoding | |
| self.rag_generator = rag_generator_instance # Assign the passed RAG instance | |
| # AGI Memory Features: Human-like memory types and structures | |
| self.node_types = {} # Track memory node types: 'episodic', 'semantic', 'procedural', 'working' | |
| self.temporal_connections = {} # Temporal sequence connections between memories | |
| self.consolidation_buffer = [] # Buffer for memory consolidation | |
| self.forgetting_curves = {} # Ebbinghaus forgetting curves for each node | |
| self.memory_hierarchies = {} # Hierarchical memory organization | |
| # Learnable LRU parameters | |
| self.use_learnable_lru = True # Enable learnable importance-based eviction | |
| self.importance_decay = 0.95 # Decay factor for importance over time | |
| self.access_boost = 0.1 # Boost to importance when accessed | |
| self.connectivity_weight = 0.3 # Weight for graph connectivity in importance | |
| # ⚡ EFFICIENT FORGETATIONS: Learnable forgetting system (better than human forgetting!) | |
| # This learns what to forget and what to keep, adapting over time | |
| self.forgetation_model = None # Neural network that learns forgetting patterns | |
| self.forgetation_history = [] # History of what was forgotten and if it was a mistake | |
| self.forgetation_learning_rate = 1e-3 # Learning rate for forgetation model | |
| self.forgetation_buffer = deque(maxlen=100) # Buffer for training forgetation model | |
| self.forgetation_optimizer = None # Optimizer for forgetation model | |
| self._initialize_forgetation_model() | |
| # Initialize combined VAE + PCA compression (AGI breakthrough approach) | |
| if self.use_vae_compression and self.use_combined_compression: | |
| try: | |
| from neuro_fusion import VAECompressor | |
| # VAE will be initialized when we get first embedding dimension | |
| # PCA will be applied to VAE latent space for maximum compression | |
| print(f"[AGI] Combined VAE+PCA compression enabled (target latent dim: {vae_latent_dim})") | |
| print(f"[AGI] Compression pipeline: Original -> VAE -> PCA -> Quantization (no fallbacks)") | |
| except ImportError: | |
| if self.use_combined_compression: | |
| print("[ERROR] VAE compressor required for combined compression - cannot proceed without it") | |
| raise ImportError("VAE compressor is required for AGI memory system") | |
| else: | |
| print("[WARN] VAE compressor not available, but combined compression disabled - will use legacy mode") | |
| self.use_vae_compression = False | |
| # Load existing graph if available (but preserve max_nodes setting) | |
| saved_max_nodes = self.max_nodes | |
| # Try loading compressed version first, then fallback to uncompressed for migration | |
| if os.path.exists(self.graph_file): | |
| self.load_graph() | |
| # Restore max_nodes setting (don't let loaded graph override infinite growth) | |
| if saved_max_nodes is None: | |
| self.max_nodes = None # Ensure infinite growth is preserved | |
| else: | |
| # Check for old uncompressed version for migration | |
| old_file = self.graph_file.replace('.gz', '') | |
| if old_file.endswith('.json') and os.path.exists(old_file): | |
| print(f"[MIGRATE] Found old uncompressed file {old_file}, loading and will convert to compressed format") | |
| # Temporarily use old file for loading | |
| temp_file = self.graph_file | |
| self.graph_file = old_file | |
| self.load_graph() | |
| self.graph_file = temp_file # Restore .gz path | |
| # Restore max_nodes setting | |
| if saved_max_nodes is None: | |
| self.max_nodes = None | |
| # Save immediately in compressed format | |
| self.save_graph() | |
| # Clean up old uncompressed file | |
| try: | |
| os.remove(old_file) | |
| print(f"[CLEANUP] Removed old uncompressed file: {old_file}") | |
| except Exception as e: | |
| print(f"[WARN] Could not remove old file {old_file}: {e}") | |
| max_nodes_str = "INFINITE" if self.max_nodes is None else str(self.max_nodes) | |
| compression_str = "VAE" if self.use_vae_compression else "PCA" | |
| print(f"[NETWORK] Initialized MemoryGraph on {self.device} | " | |
| f"Max Nodes={max_nodes_str}, Save Interval={self.save_interval}, Compression={compression_str}") | |
| # Debug: Verify max_nodes is correct | |
| if self.max_nodes is not None: | |
| print(f"[WARNING] max_nodes is set to {self.max_nodes} - this will limit growth!") | |
| else: | |
| print(f"[CONFIG] Infinite growth enabled - graph will grow without hard limit (using VAE compression for efficiency)") | |
| def _alpha_ratio(self, text: str) -> float: | |
| if not text: | |
| return 0.0 | |
| alpha = sum(1 for ch in text if ch.isalpha()) | |
| total = max(len(text), 1) | |
| return alpha / total | |
| def _sanitize_text(self, text: str, max_chars: int = 2000) -> str: | |
| if not text: | |
| return "" | |
| cleaned = text.strip() | |
| cleaned = re.sub(r"```.*?```", "", cleaned, flags=re.DOTALL) | |
| cleaned = cleaned.replace("```", "") | |
| cleaned = re.sub(r"(?i)\b(final answer|this is the final answer|answer:)\b", "", cleaned) | |
| cleaned = re.sub(r"(?i)here'?s my response:\s*", "", cleaned) | |
| cleaned = re.sub(r"<[^>]+>", "", cleaned) | |
| cleaned = re.sub(r"\s+\n", "\n", cleaned).strip() | |
| if len(cleaned) > max_chars: | |
| cleaned = cleaned[:max_chars].rstrip() + "..." | |
| return cleaned | |
| def _is_low_quality_chunk(self, text: str) -> bool: | |
| if not text or len(text) < 20: | |
| return True | |
| return self._alpha_ratio(text) < 0.6 | |
| def _score_context(self, similarity: float, importance: float, last_accessed: float) -> float: | |
| recency_hours = max((time.time() - last_accessed) / 3600.0, 0.0) | |
| recency = 1.0 / (1.0 + recency_hours / 24.0) | |
| return float(similarity) * (0.7 + 0.3 * float(importance)) * recency | |
| def _resolve_graph_path(self, graph_file: str) -> str: | |
| if os.path.isabs(graph_file): | |
| path = graph_file | |
| else: | |
| project_root = os.path.abspath(os.path.join(os.path.dirname(__file__), "..")) | |
| memory_root = os.path.join(project_root, "memory") | |
| path = os.path.join(memory_root, graph_file) | |
| os.makedirs(os.path.dirname(path), exist_ok=True) | |
| return path | |
| def _dynamic_save_interval(self) -> int: | |
| size = max(self.graph.number_of_nodes(), 1) | |
| target = int(size * 0.02) | |
| target = max(self.min_save_interval, target) | |
| return min(self.save_interval, target) | |
| def _should_save(self) -> bool: | |
| additions_trigger = self.addition_count >= self._dynamic_save_interval() | |
| time_trigger = (time.time() - self.last_save_time) >= self.save_interval_seconds | |
| return additions_trigger or time_trigger | |
| def _initialize_vae_compressor(self, embedding: torch.Tensor): | |
| """Initializes the AGI VAE compressor with shared backbone references for zero-overhead.""" | |
| if self.vae_compressor is not None: | |
| return | |
| try: | |
| from neuro_fusion import VAECompressor | |
| input_dim = embedding.size(-1) | |
| self.original_dim = input_dim | |
| # AGI breakthrough: use 2x hidden for better latent mapping | |
| hidden_dim = input_dim * 2 | |
| self.vae_compressor = VAECompressor( | |
| input_dim=input_dim, | |
| hidden_dim=hidden_dim, | |
| latent_dim=self.vae_latent_dim, | |
| shared_model=self.shared_model, | |
| shared_tokenizer=self.shared_tokenizer, | |
| enable_training=self.enable_vae_training, | |
| learning_rate=self.vae_learning_rate, | |
| device=self.device | |
| ) | |
| print(f"[MEMORY] VAE Compressor initialized (input_dim={input_dim}, latent_dim={self.vae_latent_dim}) [Unified: {self.shared_model is not None}]") | |
| except Exception as e: | |
| print(f"[WARN] VAE Initialization failed: {e}. Falling back to legacy PCA.") | |
| self.use_vae_compression = False | |
| def _initialize_compressor(self, embedding: torch.Tensor): | |
| """Legacy PCA-only initialization.""" | |
| self.original_dim = embedding.size(-1) | |
| target_dim = max(int(self.original_dim * self.compression_level), 1) | |
| try: | |
| self.pca = PCA(n_components=target_dim) | |
| print(f"[CONFIG] Legacy PCA initialized with {target_dim} components.") | |
| except Exception as e: | |
| print(f"[WARN] PCA Initialization failed: {e}") | |
| self.pca = None | |
| def _initialize_forgetation_model(self): | |
| """Initializes the neural network that learns the Ebbinghaus forgetting patterns.""" | |
| try: | |
| import torch.nn as nn | |
| from torch.optim import Adam | |
| # 5 features: [access, time, connectivity, type, consolidation] | |
| self.forgetation_model = nn.Sequential( | |
| nn.Linear(5, 16), | |
| nn.ReLU(), | |
| nn.Linear(16, 8), | |
| nn.ReLU(), | |
| nn.Linear(8, 1), | |
| nn.Sigmoid() | |
| ).to_empty(device=self.device) | |
| # ⚡ Fix: Since to_empty doesn't initialize parameters, we must call reset_parameters or similar if needed. | |
| # But nn.Sequential doesn't have it, so we iterate. | |
| for layer in self.forgetation_model: | |
| if hasattr(layer, 'reset_parameters'): | |
| layer.reset_parameters() | |
| self.forgetation_optimizer = Adam(self.forgetation_model.parameters(), lr=self.forgetation_learning_rate) | |
| print("[FORGETATIONS] Neural forgetting system active.") | |
| except Exception as e: | |
| print(f"[WARN] Failed to initialize forgetation system: {e}") | |
| def _compress_embedding(self, embedding: torch.Tensor, embedding_hash: Optional[str] = None) -> dict: | |
| """Compresses embedding via AGI pipeline: VAE -> PCA -> Quantization -> Base64.""" | |
| # ⚡ INFINITE INTELLIGENCE: Zero-overhead cache check | |
| if embedding_hash is not None and embedding_hash in self.embedding_cache: | |
| self.cache_hits += 1 | |
| return self.embedding_cache[embedding_hash] | |
| self.cache_misses += 1 | |
| # Auto-init if needed | |
| if self.use_vae_compression and self.vae_compressor is None: | |
| self._initialize_vae_compressor(embedding) | |
| if self.use_vae_compression and self.vae_compressor is not None: | |
| # ⚡ OPTIMIZATION: Process in float32 for VAE precision | |
| embedding_f32 = embedding.to(torch.float32).unsqueeze(0) | |
| # Learn from this experience (incremental training) | |
| if self.enable_vae_training and self.vae_compressor.is_trainable and hasattr(self.vae_compressor, "train_incremental"): | |
| self.vae_compressor.train_incremental(embedding_f32, beta=1.0) | |
| with torch.no_grad(): | |
| # Stage 1: VAE Encode | |
| mu, _ = self.vae_compressor._encode(embedding_f32) | |
| vae_latent = mu.squeeze(0).cpu().numpy().reshape(1, -1) | |
| # Stage 2: PCA on VAE space | |
| if self.pca_on_vae is not None: | |
| if not hasattr(self.pca_on_vae, 'components_'): | |
| self.consolidation_buffer.append(vae_latent[0]) | |
| if len(self.consolidation_buffer) >= 20: | |
| self.pca_on_vae.fit(np.array(self.consolidation_buffer)) | |
| compressed = self.pca_on_vae.transform(vae_latent)[0] | |
| self.consolidation_buffer = [] | |
| else: | |
| compressed = vae_latent[0] | |
| else: | |
| compressed = self.pca_on_vae.transform(vae_latent)[0] | |
| else: | |
| compressed = vae_latent[0] | |
| # Stage 3: Quantization | |
| scale = (2 ** (self.quantization_bits - 1) - 1) | |
| mn, mx = compressed.min(), compressed.max() | |
| rng = max(abs(mx - mn), 1e-5) | |
| norm = (compressed - mn) / rng | |
| quant = np.round(norm * scale).astype(np.int8) | |
| # Stage 4: Base64 | |
| quant_b64 = base64.b64encode(quant.tobytes()).decode('ascii') | |
| result = { | |
| 'data': quant_b64, | |
| 'format': 'base64_int8', | |
| 'min': float(mn), | |
| 'max': float(mx), | |
| 'bits': self.quantization_bits, | |
| 'compressed': True, | |
| 'vae': True, | |
| 'pca_on_vae': self.pca_on_vae is not None, | |
| 'latent_dim': self.vae_latent_dim, | |
| 'final_dim': len(compressed), | |
| 'shape': list(quant.shape) | |
| } | |
| # Cache results | |
| if embedding_hash is not None: | |
| if len(self.embedding_cache) >= self.cache_max_size: | |
| # FIFO eviction | |
| del self.embedding_cache[next(iter(self.embedding_cache))] | |
| self.embedding_cache[embedding_hash] = result | |
| return result | |
| else: | |
| # Legacy PCA Fallback | |
| if self.pca is None: | |
| self._initialize_compressor(embedding) | |
| emb_np = embedding.cpu().detach().numpy().reshape(1, -1) | |
| if hasattr(self.pca, 'components_'): | |
| compressed = self.pca.transform(emb_np)[0] | |
| else: | |
| compressed = emb_np[0] | |
| # Simple Quant/B64 | |
| scale = (2 ** (self.quantization_bits - 1) - 1) | |
| mn, mx = compressed.min(), compressed.max() | |
| rng = max(abs(mx - mn), 1e-5) | |
| norm = (compressed - mn) / rng | |
| quant = np.round(norm * scale).astype(np.int8) | |
| quant_b64 = base64.b64encode(quant.tobytes()).decode('ascii') | |
| return { | |
| 'data': quant_b64, 'format': 'base64_int8', 'min': float(mn), 'max': float(mx), | |
| 'bits': self.quantization_bits, 'compressed': True, 'vae': False, | |
| 'shape': list(quant.shape) | |
| } | |
| def _decompress_embedding(self, compressed_data): | |
| """Decompress embedding using AGI VAE or heritage PCA.""" | |
| if isinstance(compressed_data, dict) and compressed_data.get('vae', False): | |
| # Prep VAE | |
| if self.vae_compressor is None: | |
| latent_dim = compressed_data.get('latent_dim', self.vae_latent_dim) | |
| if self.original_dim is None: self.original_dim = 512 # Heuristic | |
| self._initialize_vae_compressor(torch.randn(self.original_dim)) | |
| try: | |
| data = compressed_data['data'] | |
| if isinstance(data, str) and compressed_data.get('format') == 'base64_int8': | |
| arr = np.frombuffer(base64.b64decode(data), dtype=np.int8) | |
| if 'shape' in compressed_data: arr = arr.reshape(compressed_data['shape']) | |
| # Dequantize | |
| scale = (2 ** (compressed_data['bits'] - 1) - 1) | |
| mn, mx = compressed_data['min'], compressed_data['max'] | |
| rng = max(abs(mx - mn), 1e-5) | |
| arr = arr.astype(np.float32) / scale * rng + mn | |
| else: | |
| arr = np.array(data) | |
| # Inverse PCA on VAE | |
| if compressed_data.get('pca_on_vae', False) and self.pca_on_vae is not None: | |
| arr = self.pca_on_vae.inverse_transform(arr.reshape(1, -1))[0] | |
| # VAE Decode | |
| z_tensor = torch.tensor(arr, device=self.device, dtype=torch.float32).unsqueeze(0) | |
| with torch.no_grad(): | |
| reconstructed = self.vae_compressor._decode(z_tensor) | |
| return reconstructed.squeeze(0).to(self.device).to(torch.float16) | |
| except Exception as e: | |
| print(f"[WARN] VAE Decompression error: {e}") | |
| # Standard Fallback | |
| if isinstance(compressed_data, dict) and 'data' in compressed_data: | |
| data = compressed_data['data'] | |
| if isinstance(data, str): | |
| arr = np.frombuffer(base64.b64decode(data), dtype=np.int8 if 'int8' in compressed_data.get('format','') else np.float32) | |
| if 'shape' in compressed_data: arr = arr.reshape(compressed_data['shape']) | |
| if 'bits' in compressed_data and 'int8' in compressed_data.get('format',''): | |
| scale = (2 ** (compressed_data['bits'] - 1) - 1) | |
| mn, mx = compressed_data['min'], compressed_data['max'] | |
| rng = max(abs(mx - mn), 1e-5) | |
| arr = arr.astype(np.float32) / scale * rng + mn | |
| else: | |
| arr = np.array(data) | |
| else: | |
| arr = np.array(compressed_data) | |
| if self.pca is not None and hasattr(self.pca, 'components_'): | |
| if arr.shape[0] < self.pca.n_components_: | |
| arr = np.pad(arr, (0, self.pca.n_components_ - arr.shape[0])) | |
| arr = self.pca.inverse_transform(arr.reshape(1, -1))[0] | |
| return torch.tensor(arr, device=self.device, dtype=torch.float16) | |
| def add_experience(self, embedding: torch.Tensor, metadata=None, memory_type=None): | |
| """ | |
| AGI Memory Addition: Add experience with human-like memory types and features. | |
| Args: | |
| embedding (torch.Tensor): The embedding of the experience. | |
| metadata (dict, optional): Additional metadata for the experience. | |
| Must include 'original_text' for RAG to function properly. | |
| memory_type (str, optional): Memory type - 'episodic', 'semantic', 'procedural', 'working'. | |
| Auto-detected if not provided. | |
| """ | |
| if metadata is None: | |
| metadata = {} | |
| if 'original_text' not in metadata: | |
| print("[WARN] Warning: 'original_text' not found in metadata. RAG retrieval might be limited for this experience.") | |
| metadata['original_text'] = "" # Provide a default empty string | |
| else: | |
| metadata['original_text'] = self._sanitize_text(metadata.get('original_text', ''), max_chars=2000) | |
| # AGI Feature: Auto-detect memory type if not provided | |
| if memory_type is None and self.enable_memory_types: | |
| memory_type = self._detect_memory_type(metadata, embedding) | |
| elif memory_type is None: | |
| memory_type = 'episodic' # Default | |
| # Store memory type | |
| if self.enable_memory_types: | |
| metadata['memory_type'] = memory_type | |
| compressed = self._compress_embedding(embedding) | |
| nid = self.id_counter | |
| current_time = time.time() | |
| self.graph.add_node(nid, embedding=compressed, metadata=metadata) | |
| self.node_queue.append(nid) | |
| self.access_counts[nid] = 0 | |
| self.last_accessed[nid] = current_time | |
| # AGI Features: Initialize human-like memory properties | |
| if self.enable_memory_types: | |
| self.node_types[nid] = memory_type | |
| # Initialize importance score (new nodes start with moderate importance) | |
| self.importance_scores[nid] = 0.5 | |
| # AGI Feature: Initialize forgetting curve (Ebbinghaus model) | |
| if self.enable_consolidation: | |
| self.forgetting_curves[nid] = { | |
| 'initial_strength': 1.0, | |
| 'decay_rate': 0.1, # Slower decay for important memories | |
| 'last_accessed': current_time, | |
| 'access_count': 0, | |
| 'consolidation_level': 0.0 # Increases with repeated access | |
| } | |
| # AGI Feature: Temporal connection to previous memory | |
| if self.enable_temporal_connections and len(self.node_queue) > 1: | |
| prev_nid = self.node_queue[-2] # Previous node | |
| if prev_nid in self.graph.nodes: | |
| # Add temporal edge (sequence connection) | |
| if prev_nid not in self.temporal_connections: | |
| self.temporal_connections[prev_nid] = [] | |
| self.temporal_connections[prev_nid].append(nid) | |
| # Also add to graph as temporal edge | |
| self.graph.add_edge(prev_nid, nid, weight=0.9, edge_type='temporal') | |
| # Link to the last 50 nodes if similarity > threshold | |
| nodes_to_check = list(self.graph.nodes)[-50:] | |
| if nid in nodes_to_check: # Remove self if it somehow got included (shouldn't happen with [-50:] if nid is newest) | |
| nodes_to_check.remove(nid) | |
| for other_nid in nodes_to_check: | |
| if other_nid == nid: # Redundant check but safe | |
| continue | |
| try: | |
| other_emb = self.graph.nodes[other_nid]['embedding'] | |
| sim = self._cosine_similarity( | |
| embedding, | |
| self._decompress_embedding(other_emb) | |
| ).item() | |
| if sim > self.similarity_threshold: | |
| self.graph.add_edge(nid, other_nid, weight=sim) | |
| except Exception as e: | |
| print(f"[ERROR] Error creating edge between {nid} and {other_nid}: {e}") | |
| continue | |
| self.id_counter += 1 | |
| self.addition_count += 1 | |
| # Learnable LRU Prune - only prune if: | |
| # 1. max_nodes is set AND exceeded, OR | |
| # 2. Memory pressure is high (GPU memory > threshold) | |
| should_prune = False | |
| prune_reason = "" | |
| # Debug: Check if max_nodes is unexpectedly set (likely from old saved graph) | |
| if self.max_nodes is not None and self.max_nodes <= 200: | |
| print(f"[WARNING] max_nodes={self.max_nodes} detected - this may be from old saved graph. Current nodes: {self.graph.number_of_nodes()}") | |
| print(f"[WARNING] Forcing max_nodes=None to enable infinite growth...") | |
| self.max_nodes = None # Force infinite growth if old limit detected | |
| if self.max_nodes is not None and self.graph.number_of_nodes() > self.max_nodes: | |
| should_prune = True | |
| prune_reason = f"exceeded max_nodes ({self.max_nodes})" | |
| elif self.max_nodes is None: # Infinite growth mode - check memory pressure | |
| # Check GPU memory pressure | |
| if torch.cuda.is_available(): | |
| try: | |
| # Better memory pressure calculation | |
| total_memory = torch.cuda.get_device_properties(0).total_memory | |
| reserved_memory = torch.cuda.memory_reserved(self.device) | |
| allocated_memory = torch.cuda.memory_allocated(self.device) | |
| # Calculate actual memory usage as percentage of total GPU memory | |
| memory_usage = reserved_memory / total_memory if total_memory > 0 else 0 | |
| # Only prune if memory usage exceeds threshold AND we have enough nodes to prune | |
| # Don't prune if we have fewer than 1000 nodes (allow growth) | |
| if memory_usage > self.memory_pressure_threshold and self.graph.number_of_nodes() > 1000: | |
| should_prune = True | |
| prune_reason = f"high memory pressure ({memory_usage:.1%} > {self.memory_pressure_threshold:.1%})" | |
| except Exception as e: | |
| # If memory check fails, use node count as fallback (prune if > 50k nodes) | |
| # Much higher threshold for infinite growth | |
| if self.graph.number_of_nodes() > 50000: | |
| should_prune = True | |
| prune_reason = f"large graph size ({self.graph.number_of_nodes()} nodes)" | |
| if should_prune: | |
| evicted = self.prune(1) | |
| if evicted: | |
| print(f"[REMOVE] Evicted node {evicted} ({'Importance-based' if self.use_learnable_lru else 'LRU'}, {prune_reason}). Now {self.graph.number_of_nodes()} nodes remain.") | |
| # Async Save using dynamic interval + time-based trigger | |
| if self._should_save(): | |
| dynamic_interval = self._dynamic_save_interval() | |
| print(f"[SAVE] Async saving MemoryGraph (after {dynamic_interval} adds or {self.save_interval_seconds}s)...") | |
| try: | |
| from thread_manager import create_managed_thread | |
| create_managed_thread(target=self.save_graph, name="MemoryGraphSave") | |
| except ImportError: | |
| # Fallback to synchronous save if thread_manager not available | |
| self.save_graph() | |
| self.addition_count = 0 | |
| self.last_save_time = time.time() | |
| print(f"[PACKAGE] MemoryGraph size: {self.graph.number_of_nodes()}") | |
| # alias for backward compatibility | |
| add_node = add_experience | |
| def prune(self, k: int = 1): | |
| """ | |
| ⚡ EFFICIENT FORGETATIONS: Learnable forgetting system. | |
| Removes k nodes using intelligent, adaptive forgetting that learns what to forget. | |
| Better than human forgetting because it learns from mistakes and adapts over time! | |
| Uses: | |
| 1. Learnable neural network to predict importance | |
| 2. Learns from mistakes (if forgotten memory is later needed, learn from it) | |
| 3. Adapts forgetting curves based on experience | |
| 4. Uses semantic understanding from shared model when available | |
| """ | |
| if not self.graph.nodes: | |
| return None | |
| removed = [] | |
| if self.use_learnable_lru: | |
| # ⚡ EFFICIENT FORGETATIONS: Use learnable model if available | |
| node_importances = {} | |
| current_time = time.time() | |
| for nid in list(self.graph.nodes): | |
| # Extract features for forgetation model | |
| access_count = self.access_counts.get(nid, 0) | |
| time_since_access = current_time - self.last_accessed.get(nid, current_time) | |
| connectivity = self.graph.degree(nid) | |
| max_degree = max([self.graph.degree(n) for n in self.graph.nodes()] + [1]) | |
| normalized_connectivity = connectivity / max_degree if max_degree > 0 else 0 | |
| # Memory type encoding (episodic=0.0, semantic=0.33, procedural=0.66, working=1.0) | |
| memory_type = self.node_types.get(nid, 'episodic') | |
| type_encoding = {'episodic': 0.0, 'semantic': 0.33, 'procedural': 0.66, 'working': 1.0}.get(memory_type, 0.0) | |
| # Consolidation level from forgetting curve | |
| forgetting_curve = self.forgetting_curves.get(nid, {}) | |
| consolidation = forgetting_curve.get('consolidation_level', 0.0) | |
| # Normalize features | |
| normalized_access = min(access_count / 100.0, 1.0) # Cap at 100 accesses | |
| normalized_time = min(time_since_access / (365 * 24 * 3600), 1.0) # Normalize to 1 year | |
| # Use forgetation model if available | |
| if self.forgetation_model is not None: | |
| try: | |
| # Prepare input features | |
| features = torch.tensor([ | |
| normalized_access, | |
| normalized_time, | |
| normalized_connectivity, | |
| type_encoding, | |
| consolidation | |
| ], device=self.device, dtype=torch.float32).unsqueeze(0) | |
| # Predict importance using learnable model | |
| self.forgetation_model.eval() | |
| with torch.no_grad(): | |
| predicted_importance = self.forgetation_model(features).item() | |
| # Combine with base importance (weighted average) | |
| base_importance = self.importance_scores.get(nid, 0.5) | |
| importance = 0.7 * predicted_importance + 0.3 * base_importance | |
| except Exception as e: | |
| # Fallback to rule-based if model fails | |
| base_importance = self.importance_scores.get(nid, 0.5) | |
| access_boost = min(access_count * self.access_boost, 1.0) | |
| connectivity_boost = normalized_connectivity * self.connectivity_weight | |
| time_decay = self.importance_decay ** (time_since_access / 3600) | |
| importance = (base_importance + access_boost + connectivity_boost) * time_decay | |
| else: | |
| # Rule-based fallback (backward compatible) | |
| base_importance = self.importance_scores.get(nid, 0.5) | |
| access_boost = min(access_count * self.access_boost, 1.0) | |
| connectivity_boost = normalized_connectivity * self.connectivity_weight | |
| time_decay = self.importance_decay ** (time_since_access / 3600) | |
| importance = (base_importance + access_boost + connectivity_boost) * time_decay | |
| node_importances[nid] = importance | |
| # Sort by importance (lowest first) and remove k nodes | |
| sorted_nodes = sorted(node_importances.items(), key=lambda x: x[1]) | |
| nodes_to_remove = [nid for nid, _ in sorted_nodes[:k]] | |
| for nid in nodes_to_remove: | |
| if nid in self.graph: | |
| # ⚡ EFFICIENT FORGETATIONS: Record what we're forgetting for learning | |
| forgetation_record = { | |
| 'node_id': nid, | |
| 'importance': node_importances[nid], | |
| 'access_count': self.access_counts.get(nid, 0), | |
| 'connectivity': self.graph.degree(nid), | |
| 'memory_type': self.node_types.get(nid, 'episodic'), | |
| 'timestamp': current_time | |
| } | |
| self.forgetation_history.append(forgetation_record) | |
| # Remove node | |
| self.graph.remove_node(nid) | |
| self.access_counts.pop(nid, None) | |
| self.last_accessed.pop(nid, None) | |
| self.importance_scores.pop(nid, None) | |
| # Remove from queue if present | |
| if nid in self.node_queue: | |
| self.node_queue.remove(nid) | |
| removed.append(nid) | |
| else: | |
| # Simple LRU: remove oldest nodes | |
| for _ in range(k): | |
| if not self.node_queue: | |
| break | |
| old = self.node_queue.popleft() | |
| if old in self.graph: | |
| self.graph.remove_node(old) | |
| self.access_counts.pop(old, None) | |
| self.last_accessed.pop(old, None) | |
| self.importance_scores.pop(old, None) | |
| removed.append(old) | |
| else: | |
| print(f"Node {old} not found in graph during prune, skipping.") | |
| # ⚡ EFFICIENT FORGETATIONS: Train model if we have enough history | |
| if len(self.forgetation_history) >= 10 and self.forgetation_model is not None: | |
| self._train_forgetation_model() | |
| return removed[-1] if removed else None | |
| def _train_forgetation_model(self): | |
| """ | |
| ⚡ EFFICIENT FORGETATIONS: Train the learnable forgetting model. | |
| Learns from mistakes - if a forgotten memory is later needed, we learn from it! | |
| """ | |
| if self.forgetation_model is None or len(self.forgetation_history) < 10: | |
| return | |
| try: | |
| import torch.nn as nn | |
| # Prepare training data from history | |
| # Check if any forgotten memories were later accessed (mistakes) | |
| training_data = [] | |
| for record in self.forgetation_history[-50:]: # Use last 50 records | |
| nid = record['node_id'] | |
| # Check if this node was later accessed (mistake - we shouldn't have forgotten it) | |
| was_mistake = False | |
| if nid in self.access_counts: # If it's still accessible, it wasn't forgotten | |
| was_mistake = False | |
| else: | |
| # Check if it was accessed after being forgotten (would need to track this) | |
| # For now, use importance as proxy - if importance was high, it was a mistake | |
| was_mistake = record['importance'] > 0.3 # If importance > 0.3, probably a mistake | |
| # Features: [access_count, time_since_access, connectivity, memory_type, consolidation] | |
| features = torch.tensor([ | |
| min(record['access_count'] / 100.0, 1.0), | |
| min((time.time() - record['timestamp']) / (365 * 24 * 3600), 1.0), | |
| min(record['connectivity'] / 10.0, 1.0), | |
| {'episodic': 0.0, 'semantic': 0.33, 'procedural': 0.66, 'working': 1.0}.get(record['memory_type'], 0.0), | |
| 0.0 # Consolidation (would need to track) | |
| ], device=self.device, dtype=torch.float32) | |
| # Target: 1.0 if it was a mistake (shouldn't have forgotten), 0.0 if correct | |
| target = torch.tensor([1.0 if was_mistake else 0.0], device=self.device, dtype=torch.float32) | |
| training_data.append((features, target)) | |
| if len(training_data) < 5: | |
| return # Need at least 5 samples | |
| # Train for a few steps | |
| self.forgetation_model.train() | |
| criterion = nn.MSELoss() | |
| for epoch in range(3): # Quick training | |
| total_loss = 0.0 | |
| for features, target in training_data: | |
| self.forgetation_optimizer.zero_grad() | |
| prediction = self.forgetation_model(features.unsqueeze(0)) | |
| loss = criterion(prediction, target.unsqueeze(0)) | |
| loss.backward() | |
| torch.nn.utils.clip_grad_norm_(self.forgetation_model.parameters(), max_norm=1.0) | |
| self.forgetation_optimizer.step() | |
| total_loss += loss.item() | |
| self.forgetation_model.eval() | |
| if self.addition_count % 100 == 0: | |
| avg_loss = total_loss / len(training_data) | |
| print(f"[FORGETATIONS] Trained model (avg loss: {avg_loss:.4f}, samples: {len(training_data)})") | |
| except Exception as e: | |
| # Silently fail - don't break the system if training fails | |
| pass | |
| def learn_from_forgetting_mistake(self, forgotten_node_id: int, actual_importance: float): | |
| """ | |
| ⚡ EFFICIENT FORGETATIONS: Learn from a mistake. | |
| Call this when we realize we shouldn't have forgotten a memory. | |
| This makes the system better than human forgetting - it learns from errors! | |
| Args: | |
| forgotten_node_id: ID of the node that was forgotten but shouldn't have been | |
| actual_importance: The actual importance (higher = bigger mistake) | |
| """ | |
| if self.forgetation_model is None: | |
| return | |
| # Find the forgetation record | |
| for record in reversed(self.forgetation_history): | |
| if record['node_id'] == forgotten_node_id: | |
| # Add to training buffer as a mistake | |
| self.forgetation_buffer.append({ | |
| 'record': record, | |
| 'was_mistake': True, | |
| 'actual_importance': actual_importance | |
| }) | |
| break | |
| # Train immediately if buffer is full | |
| if len(self.forgetation_buffer) >= 10: | |
| self._train_forgetation_model() | |
| self.forgetation_buffer.clear() | |
| def _detect_memory_type(self, metadata: dict, embedding: torch.Tensor) -> str: | |
| """ | |
| AGI Feature: Auto-detect memory type based on metadata and content. | |
| Human-like memory classification. | |
| """ | |
| # Check metadata for explicit type hints | |
| text = metadata.get('original_text', '').lower() | |
| metadata_type = metadata.get('type', '').lower() | |
| # Procedural: Contains action words, steps, instructions | |
| procedural_keywords = ['step', 'how to', 'procedure', 'process', 'method', 'algorithm', 'instruction'] | |
| if any(kw in text for kw in procedural_keywords) or 'procedural' in metadata_type: | |
| return 'procedural' | |
| # Semantic: Facts, concepts, definitions | |
| semantic_keywords = ['definition', 'concept', 'fact', 'knowledge', 'meaning', 'is a', 'refers to'] | |
| if any(kw in text for kw in semantic_keywords) or 'semantic' in metadata_type: | |
| return 'semantic' | |
| # Working: Short-term, recent, active | |
| if metadata.get('stage', 0) == 0 or 'working' in metadata_type: | |
| return 'working' | |
| # Episodic: Default - personal experiences, events, narratives | |
| return 'episodic' | |
| def _update_forgetting_curve(self, nid: int): | |
| """ | |
| AGI Feature: Update Ebbinghaus forgetting curve for memory consolidation. | |
| """ | |
| if nid not in self.forgetting_curves: | |
| return | |
| curve = self.forgetting_curves[nid] | |
| current_time = time.time() | |
| time_since_access = current_time - curve['last_accessed'] | |
| # Ebbinghaus forgetting curve: R = e^(-t/S) | |
| # where R is retention, t is time, S is strength | |
| # Update strength based on access frequency | |
| access_count = curve['access_count'] | |
| consolidation = curve['consolidation_level'] | |
| # More consolidated memories decay slower | |
| effective_decay = curve['decay_rate'] * (1.0 - consolidation * 0.5) | |
| retention = np.exp(-time_since_access / (3600 * (1.0 + consolidation))) # Hours scale | |
| # Update consolidation based on repeated access | |
| if access_count > 0: | |
| consolidation = min(1.0, consolidation + 0.1 * (1.0 - consolidation)) | |
| curve['consolidation_level'] = consolidation | |
| curve['last_accessed'] = current_time | |
| curve['access_count'] += 1 | |
| # Update importance based on retention and consolidation | |
| self.importance_scores[nid] = max(0.1, min(1.0, retention * (0.5 + consolidation * 0.5))) | |
| def consolidate_memories(self, threshold: float = 0.3): | |
| """ | |
| AGI Feature: Memory consolidation - strengthen important memories, weaken forgotten ones. | |
| Human-like memory reconsolidation process. | |
| """ | |
| if not self.enable_consolidation: | |
| return | |
| consolidated = 0 | |
| weakened = 0 | |
| for nid in list(self.graph.nodes): | |
| if nid not in self.forgetting_curves: | |
| continue | |
| self._update_forgetting_curve(nid) | |
| importance = self.importance_scores.get(nid, 0.5) | |
| # Strengthen well-consolidated memories | |
| if importance > 0.7: | |
| self.importance_scores[nid] = min(1.0, importance * 1.05) | |
| consolidated += 1 | |
| # Weaken forgotten memories (but don't delete - they might be recalled) | |
| elif importance < threshold: | |
| self.importance_scores[nid] = max(0.05, importance * 0.95) | |
| weakened += 1 | |
| if consolidated > 0 or weakened > 0: | |
| print(f"[CONSOLIDATE] Strengthened {consolidated} memories, weakened {weakened} forgotten memories") | |
| def _cosine_similarity(self, emb1: torch.Tensor, emb2: torch.Tensor): | |
| # Ensure tensors are on the same device and are float32 for F.cosine_similarity | |
| emb1 = emb1.to(self.device).to(torch.float32) | |
| emb2 = emb2.to(self.device).to(torch.float32) | |
| v1, v2 = emb1.view(-1), emb2.view(-1) | |
| # Handle differing dimensions by padding the smaller one or truncating the larger one | |
| min_len = min(v1.size(0), v2.size(0)) | |
| if v1.size(0) > min_len: | |
| v1 = v1[:min_len] | |
| elif v2.size(0) > min_len: | |
| v2 = v2[:min_len] | |
| # Ensure tensors are not empty | |
| if v1.numel() == 0 or v2.numel() == 0: | |
| return torch.tensor(0.0, device=self.device) | |
| return F.cosine_similarity( | |
| v1.unsqueeze(0), | |
| v2.unsqueeze(0) | |
| ).squeeze() | |
| def retrieve_experience(self, query_embedding: torch.Tensor, top_k: int = 1) -> List[dict]: | |
| """ | |
| Retrieve the top_k most similar experiences based on an embedding. | |
| Updates importance scores for retrieved nodes (learnable LRU). | |
| Args: | |
| query_embedding (torch.Tensor): The embedding to query with. | |
| top_k (int): Number of top results to return. | |
| Returns: | |
| List[dict]: Retrieved memory entries, each with 'embedding' and 'metadata' (including original_text). | |
| """ | |
| sims = [] | |
| for nid in list(self.graph.nodes): # Convert to list to avoid RuntimeError if graph changes during iteration | |
| try: | |
| node_data = self.graph.nodes[nid] | |
| node_emb_data = node_data.get('embedding') | |
| if node_emb_data is None: | |
| print(f"Warning: Node {nid} has no embedding data, skipping.") | |
| continue | |
| node_emb_tensor = self._decompress_embedding(node_emb_data) | |
| sim = self._cosine_similarity( | |
| query_embedding, | |
| node_emb_tensor | |
| ).item() | |
| sims.append((nid, sim)) | |
| except Exception as e: | |
| print(f"[ERROR] Error retrieving experience for node {nid}: {e}") | |
| continue | |
| sims.sort(key=lambda x: x[1], reverse=True) | |
| results = [] | |
| for nid, sim_score in sims[:top_k]: | |
| node_data = self.graph.nodes[nid] | |
| try: | |
| # AGI Feature: Update importance and access tracking (learnable LRU + forgetting curves) | |
| if self.use_learnable_lru: | |
| # ⚡ EFFICIENT FORGETATIONS: Check if this was a forgotten memory (mistake detection) | |
| # If this node was in forgetation history, we made a mistake! | |
| was_forgotten = False | |
| for record in self.forgetation_history: | |
| if record['node_id'] == nid: | |
| was_forgotten = True | |
| # Learn from mistake - this memory was important but we forgot it | |
| actual_importance = sim_score # Use similarity as proxy for importance | |
| self.learn_from_forgetting_mistake(nid, actual_importance) | |
| if self.addition_count % 50 == 0: | |
| print(f"[FORGETATIONS] Learned from mistake: node {nid} was forgotten but is important (sim: {sim_score:.3f})") | |
| break | |
| # Boost importance when accessed | |
| current_importance = self.importance_scores.get(nid, 0.5) | |
| # Extra boost if it was previously forgotten (bigger mistake = bigger boost) | |
| boost = self.access_boost * (2.0 if was_forgotten else 1.0) | |
| self.importance_scores[nid] = min(current_importance + boost, 1.0) | |
| # Update access counts and time | |
| self.access_counts[nid] = self.access_counts.get(nid, 0) + 1 | |
| self.last_accessed[nid] = time.time() | |
| # AGI Feature: Update forgetting curve on access (memory consolidation) | |
| if self.enable_consolidation and nid in self.forgetting_curves: | |
| self._update_forgetting_curve(nid) | |
| emb = self._decompress_embedding(node_data['embedding']).cpu().tolist() | |
| metadata = dict(node_data.get('metadata', {})) | |
| metadata['similarity_score'] = sim_score # Add similarity score to metadata | |
| metadata['node_id'] = nid | |
| results.append({'embedding': emb, 'metadata': metadata}) | |
| except Exception as e: | |
| print(f"Error processing retrieved node {nid} for results: {e}") | |
| continue | |
| return results | |
| def retrieve(self, query: str, top_k: int = 3) -> str: | |
| """ | |
| Encode a text query, retrieve the top_k most similar experiences | |
| from the MemoryGraph, and then use RAG to generate a response. | |
| Args: | |
| query (str): The text string to query against memory. | |
| top_k (int): Number of top results to return to the RAG model as context. | |
| Returns: | |
| str: The RAG-generated answer based on the query and retrieved context. | |
| """ | |
| # Check if RAG has CUDA errors - if so, skip RAG and return direct results | |
| if hasattr(self, '_rag_cuda_error') and self._rag_cuda_error: | |
| # Skip RAG, return direct memory results | |
| try: | |
| # Try shared model first | |
| q_emb = None | |
| if self.shared_model is not None: | |
| try: | |
| # Get tokenizer | |
| if hasattr(self.shared_model, 'get_tokenizer'): | |
| tokenizer = self.shared_model.get_tokenizer() | |
| else: | |
| tokenizer = getattr(self, 'shared_tokenizer', None) | |
| if tokenizer is not None: | |
| inputs = tokenizer(query, return_tensors="pt", padding=True, truncation=True, max_length=512) | |
| if hasattr(self.shared_model, 'get_model'): | |
| model = self.shared_model.get_model() | |
| else: | |
| model = self.shared_model | |
| device = next(model.parameters()).device | |
| inputs = {k: v.to(device) for k, v in inputs.items()} | |
| with torch.no_grad(): | |
| if hasattr(model, 'get_input_embeddings'): | |
| input_embeddings = model.get_input_embeddings() | |
| token_embeddings = input_embeddings(inputs['input_ids']) | |
| q_emb = token_embeddings.mean(dim=1) | |
| q_emb = F.normalize(q_emb, p=2, dim=1) | |
| except: | |
| pass | |
| if q_emb is None: | |
| if self.shared_adapter is not None: | |
| q_emb = self.shared_adapter.encode_text(query, max_length=512, use_hidden=False) | |
| else: | |
| return None | |
| if isinstance(q_emb, torch.Tensor) and hasattr(self, 'device'): | |
| try: | |
| q_emb = q_emb.to(self.device) | |
| except: | |
| pass | |
| retrieved_experiences = self.retrieve_experience(q_emb, top_k) | |
| # Return formatted results instead of RAG response | |
| results = [] | |
| for exp in retrieved_experiences: | |
| text = exp['metadata'].get('original_text', '') | |
| if text: | |
| results.append({'text': text, 'embedding': exp.get('embedding'), 'metadata': exp.get('metadata', {})}) | |
| return results if results else None | |
| except: | |
| return None | |
| if self.rag_generator is None: | |
| # Don't print warning - RAG is optional and handled by AGIMemorySystem | |
| return None # Return None instead of error string to indicate RAG not available | |
| # CRITICAL: Use shared Qwen model for encoding (zero memory overhead, best coherence) | |
| q_emb = None | |
| if self.shared_model is not None or (SHARED_MODEL_AVAILABLE and get_shared_model is not None): | |
| try: | |
| # Get shared model if not already set | |
| shared_model = self.shared_model | |
| if shared_model is None and SHARED_MODEL_AVAILABLE and get_shared_model is not None: | |
| shared_model = get_shared_model() | |
| if shared_model is not None: | |
| # Get tokenizer - try multiple methods for maximum compatibility | |
| tokenizer = None | |
| # Method 1: Direct method from shared_model | |
| if hasattr(shared_model, 'get_tokenizer'): | |
| tokenizer = shared_model.get_tokenizer() | |
| # Method 2: Attribute access | |
| elif hasattr(shared_model, 'tokenizer'): | |
| tokenizer = shared_model.tokenizer | |
| # Method 3: From instance attribute | |
| elif hasattr(self, 'shared_tokenizer') and self.shared_tokenizer is not None: | |
| tokenizer = self.shared_tokenizer | |
| # Method 4: Use global get_shared_tokenizer function (most reliable) | |
| elif SHARED_MODEL_AVAILABLE and get_shared_tokenizer is not None: | |
| try: | |
| tokenizer = get_shared_tokenizer() | |
| except Exception: | |
| pass | |
| elif self.shared_adapter is not None: | |
| tokenizer = self.shared_adapter.get_tokenizer() | |
| # Get model - try multiple methods | |
| model = None | |
| if hasattr(shared_model, 'get_model'): | |
| model = shared_model.get_model() | |
| elif hasattr(shared_model, 'model'): | |
| model = shared_model.model | |
| elif hasattr(shared_model, 'base_model'): | |
| model = shared_model.base_model | |
| else: | |
| model = shared_model | |
| if tokenizer is not None and model is not None: | |
| # CRITICAL: Use efficient tokenization (shorter sequences for speed) | |
| inputs = tokenizer( | |
| query, | |
| return_tensors="pt", | |
| padding=False, # No padding for speed | |
| truncation=True, | |
| max_length=256 # Reduced from 512 for faster processing | |
| ) | |
| # Get device from model (handle wrapped models) | |
| try: | |
| # Try to get device from model parameters | |
| if hasattr(model, 'parameters'): | |
| device = next(model.parameters()).device | |
| elif hasattr(model, 'device'): | |
| device = model.device | |
| else: | |
| device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') | |
| except: | |
| device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') | |
| inputs = {k: v.to(device) for k, v in inputs.items()} | |
| # CRITICAL: Use embedding layer directly (fastest, zero overhead) | |
| with torch.no_grad(): | |
| try: | |
| # Method 1: Direct embedding layer (fastest) | |
| if hasattr(model, 'get_input_embeddings'): | |
| input_embeddings = model.get_input_embeddings() | |
| token_embeddings = input_embeddings(inputs['input_ids']) | |
| # Mean pool and normalize | |
| q_emb = token_embeddings.mean(dim=1) | |
| q_emb = F.normalize(q_emb, p=2, dim=1) | |
| # Method 2: Qwen-style embed_tokens (direct access) | |
| elif hasattr(model, 'embed_tokens'): | |
| token_embeddings = model.embed_tokens(inputs['input_ids']) | |
| q_emb = token_embeddings.mean(dim=1) | |
| q_emb = F.normalize(q_emb, p=2, dim=1) | |
| # Method 3: Access through base_model.embed_tokens (PEFT/LoRA wrapped) | |
| elif hasattr(model, 'base_model') and hasattr(model.base_model, 'embed_tokens'): | |
| token_embeddings = model.base_model.embed_tokens(inputs['input_ids']) | |
| q_emb = token_embeddings.mean(dim=1) | |
| q_emb = F.normalize(q_emb, p=2, dim=1) | |
| # Method 4: Access through model.model.embed_tokens (nested structure) | |
| elif hasattr(model, 'model') and hasattr(model.model, 'embed_tokens'): | |
| token_embeddings = model.model.embed_tokens(inputs['input_ids']) | |
| q_emb = token_embeddings.mean(dim=1) | |
| q_emb = F.normalize(q_emb, p=2, dim=1) | |
| # Method 5: Minimal forward pass (last resort, slower but works) | |
| else: | |
| outputs = model(**inputs, output_hidden_states=True, use_cache=False) | |
| if hasattr(outputs, 'hidden_states') and outputs.hidden_states: | |
| q_emb = outputs.hidden_states[-1].mean(dim=1) | |
| q_emb = F.normalize(q_emb, p=2, dim=1) | |
| elif hasattr(outputs, 'last_hidden_state'): | |
| q_emb = outputs.last_hidden_state.mean(dim=1) | |
| q_emb = F.normalize(q_emb, p=2, dim=1) | |
| else: | |
| raise ValueError("Could not extract embeddings from model output") | |
| # Move to memory graph device (if different) | |
| if isinstance(q_emb, torch.Tensor) and hasattr(self, 'device'): | |
| try: | |
| target_device = self.device | |
| if q_emb.device != target_device: | |
| q_emb = q_emb.to(target_device) | |
| except: | |
| pass # Keep on original device if move fails | |
| except Exception as e: | |
| # Log error but don't print (too verbose) | |
| import logging | |
| logger = logging.getLogger(__name__) | |
| logger.debug(f"Embedding extraction failed: {e}") | |
| q_emb = None | |
| else: | |
| # Tokenizer or model not available | |
| if tokenizer is None: | |
| import logging | |
| logger = logging.getLogger(__name__) | |
| logger.debug("Tokenizer not available for shared model encoding") | |
| if model is None: | |
| import logging | |
| logger = logging.getLogger(__name__) | |
| logger.debug("Model not available for shared model encoding") | |
| q_emb = None | |
| except Exception as e: | |
| # Log error but don't print (too verbose during training) | |
| import logging | |
| logger = logging.getLogger(__name__) | |
| logger.debug(f"Shared model encoding failed: {e}") | |
| q_emb = None | |
| if q_emb is None: | |
| if self.shared_adapter is not None: | |
| q_emb = self.shared_adapter.encode_text(query, max_length=512, use_hidden=False) | |
| else: | |
| return None | |
| if q_emb is None: | |
| return None | |
| # Retrieve the top_k similar memories from the MemoryGraph | |
| retrieved_experiences = self.retrieve_experience(q_emb, top_k) | |
| # Extract original texts to use as context for RAG | |
| scored_contexts = [] | |
| for exp in retrieved_experiences: | |
| metadata = exp.get('metadata', {}) | |
| raw_text = metadata.get('original_text', '') | |
| cleaned = self._sanitize_text(raw_text, max_chars=2000) | |
| if self._is_low_quality_chunk(cleaned): | |
| continue | |
| sim = metadata.get('similarity_score', 0.0) | |
| nid = metadata.get('node_id') | |
| importance = self.importance_scores.get(nid, 0.5) if nid is not None else 0.5 | |
| last_accessed = self.last_accessed.get(nid, time.time()) if nid is not None else time.time() | |
| score = self._score_context(sim, importance, last_accessed) | |
| scored_contexts.append((score, cleaned)) | |
| scored_contexts.sort(key=lambda x: x[0], reverse=True) | |
| context_texts = [text for _, text in scored_contexts[:top_k]] | |
| # Also retrieve context from the RAG's internal knowledge base (Wikitext) | |
| wikitext_context = [self._sanitize_text(t, max_chars=1500) for t in self.rag_generator.retrieve_documents(query, top_k=top_k)] | |
| wikitext_context = [t for t in wikitext_context if not self._is_low_quality_chunk(t)] | |
| # Combine contexts, prioritizing MemoryGraph's context | |
| # Use dict.fromkeys to preserve order and remove duplicates | |
| combined_context = list(dict.fromkeys(context_texts + wikitext_context)) | |
| max_total_chars = 3500 | |
| trimmed_context = [] | |
| total_chars = 0 | |
| for chunk in combined_context: | |
| if total_chars + len(chunk) > max_total_chars: | |
| remaining = max_total_chars - total_chars | |
| if remaining > 100: | |
| trimmed_context.append(chunk[:remaining].rstrip() + "...") | |
| break | |
| trimmed_context.append(chunk) | |
| total_chars += len(chunk) | |
| combined_context = trimmed_context | |
| if not combined_context: | |
| print("[NOTICE] No context available from either MemoryGraph or Wikitext. Generating general response.") | |
| # If still no context, try to generate but catch CUDA errors | |
| try: | |
| return self.rag_generator.generate_response(query=query, retrieved_context=[], max_length=100) | |
| except (RuntimeError, torch.cuda.CudaError, AssertionError) as e: | |
| error_str = str(e).lower() | |
| if any(keyword in error_str for keyword in ["cuda", "assert", "device-side"]): | |
| self._rag_cuda_error = True | |
| # Return direct memory results instead | |
| results = [] | |
| for exp in retrieved_experiences: | |
| text = exp['metadata'].get('original_text', '') | |
| if text: | |
| results.append({'text': text, 'embedding': exp.get('embedding'), 'metadata': exp.get('metadata', {})}) | |
| return results if results else None | |
| else: | |
| raise | |
| # Generate response using RAG - catch CUDA errors | |
| try: | |
| rag_response = self.rag_generator.generate_response( | |
| query=query, | |
| retrieved_context=combined_context | |
| ) | |
| return rag_response | |
| except (RuntimeError, torch.cuda.CudaError, AssertionError) as e: | |
| error_str = str(e).lower() | |
| if any(keyword in error_str for keyword in ["cuda", "assert", "device-side"]): | |
| self._rag_cuda_error = True | |
| print(f"[WARN] RAG generation failed due to CUDA error, returning direct memory results") | |
| # Return direct memory results instead of RAG response | |
| results = [] | |
| for exp in retrieved_experiences: | |
| text = exp['metadata'].get('original_text', '') | |
| if text: | |
| results.append({'text': text, 'embedding': exp.get('embedding'), 'metadata': exp.get('metadata', {})}) | |
| return results if results else None | |
| else: | |
| raise | |
| def propagate(self, input_embedding: torch.Tensor): | |
| # Safely unpack the tensor shape | |
| try: | |
| B, L, D = input_embedding.shape | |
| except ValueError as e: | |
| print(f"[INSPECT] Failed to unpack tensor shape {input_embedding.shape}: {e}") | |
| # Handle unexpected shapes | |
| if input_embedding.dim() == 2: | |
| # 2D tensor [batch, features] - add sequence dimension | |
| B, D = input_embedding.shape | |
| L = 1 | |
| input_embedding = input_embedding.unsqueeze(1) # [B, 1, D] | |
| elif input_embedding.dim() == 4: | |
| # 4D tensor - flatten to 3D | |
| try: | |
| B, L1, L2, D = input_embedding.shape | |
| L = L1 * L2 | |
| input_embedding = input_embedding.view(B, L, D) | |
| print(f"[INSPECT] Reshaped 4D tensor {input_embedding.shape} to 3D: [{B}, {L}, {D}]") | |
| except ValueError as e2: | |
| print(f"[INSPECT] Failed to unpack 4D tensor shape {input_embedding.shape}: {e2}") | |
| # Fallback: flatten all dimensions except batch and last | |
| B = input_embedding.shape[0] | |
| D = input_embedding.shape[-1] | |
| L = input_embedding.numel() // (B * D) | |
| input_embedding = input_embedding.view(B, L, D) | |
| print(f"[INSPECT] Fallback reshape to 3D: [{B}, {L}, {D}]") | |
| else: | |
| raise ValueError(f"Unsupported tensor shape for propagate: {input_embedding.shape}") | |
| output = torch.zeros_like(input_embedding) | |
| norm_in = input_embedding / (input_embedding.norm(dim=-1, keepdim=True) + 1e-8) | |
| # Iterate over a limited number of recent nodes to avoid excessive computation | |
| nodes_to_propagate = list(self.graph.nodes)[-min(100, self.graph.number_of_nodes()):] | |
| if not nodes_to_propagate: | |
| return output # Return zero tensor if no nodes | |
| for nid in nodes_to_propagate: | |
| try: | |
| node_emb = self._decompress_embedding(self.graph.nodes[nid]['embedding']) | |
| # Pad/truncate node_emb to match D | |
| if node_emb.size(0) < D: | |
| # Pad with zeros to match dimension D | |
| node_emb = F.pad(node_emb, (0, D - node_emb.size(0)), 'constant', 0) | |
| elif node_emb.size(0) > D: | |
| # Truncate to dimension D | |
| node_emb = node_emb[:D] | |
| # Ensure node_emb is 1D for norm calculation if it somehow becomes 0D | |
| if node_emb.dim() == 0: | |
| node_emb = node_emb.unsqueeze(0) | |
| node_norm = node_emb / (node_emb.norm() + 1e-8) | |
| # Ensure consistent dimensions for cosine_similarity | |
| # norm_in is (B, L, D) and node_norm is (D,) | |
| # Unsqueeze node_norm to (1, 1, D) for broadcasting | |
| sim = F.cosine_similarity(norm_in, node_norm.view(1,1,-1), dim=-1) | |
| # Output has shape (B, L) after similarity. Need to expand to (B, L, 1) for element-wise multiplication | |
| output += sim.unsqueeze(-1) * input_embedding | |
| except Exception as e: | |
| print(f"[ERROR] Error during propagation for node {nid}: {e}") | |
| continue | |
| # Avoid division by zero | |
| num_propagated_nodes = len(nodes_to_propagate) | |
| return output / max(num_propagated_nodes, 1) | |
| def consolidate(self, remove_isolated: bool = False): | |
| to_remove_edges = [] | |
| for u, v, data in self.graph.edges(data=True): | |
| # Check if 'weight' key exists and if its value is None or less than 0.01 | |
| if 'weight' not in data or data['weight'] is None or data['weight'] < 0.01: | |
| to_remove_edges.append((u, v)) | |
| self.graph.remove_edges_from(to_remove_edges) | |
| if remove_isolated: | |
| # Use a copy of the graph to find isolated nodes, as graph can change during iteration | |
| isolated = list(nx.isolates(self.graph.copy())) | |
| self.graph.remove_nodes_from(isolated) | |
| # Remove from node_queue as well | |
| self.node_queue = deque([nid for nid in self.node_queue if nid not in set(isolated)]) | |
| for nid in isolated: | |
| self.access_counts.pop(nid, None) | |
| self.last_accessed.pop(nid, None) | |
| print(f"[REMOVE] Removed {len(isolated)} isolated nodes.") | |
| def save_graph(self): | |
| try: | |
| # Prepare embedding data for JSON serialization | |
| serializable_nodes = [] | |
| for nid, data in self.graph.nodes(data=True): | |
| node_copy = data.copy() | |
| # Ensure embedding is a dictionary with 'data' key for serialization | |
| current_embedding = node_copy.get('embedding') | |
| if isinstance(current_embedding, torch.Tensor): | |
| node_copy['embedding'] = {'data': current_embedding.cpu().tolist(), 'compressed': False} | |
| elif isinstance(current_embedding, list): # Handle old list format | |
| node_copy['embedding'] = {'data': current_embedding, 'compressed': False} | |
| elif not isinstance(current_embedding, dict) or 'data' not in current_embedding: | |
| # Fallback for unexpected format, ensure it's a dict | |
| print(f"Warning: Node {nid} has unexpected embedding format during save. Attempting conversion.") | |
| if isinstance(current_embedding, dict) and current_embedding: # if non-empty dict but no 'data' | |
| node_copy['embedding'] = {'data': list(current_embedding.values())[0], 'compressed': False} # Best guess | |
| else: # empty dict or other type | |
| node_copy['embedding'] = {'data': [], 'compressed': False} # Fallback to empty | |
| # Make sure metadata is also serializable (e.g., if it contains numpy types) | |
| for key, value in node_copy.get('metadata', {}).items(): | |
| if isinstance(value, np.ndarray): | |
| node_copy['metadata'][key] = value.tolist() | |
| elif isinstance(value, np.generic): # Catch single numpy types like np.float32 | |
| node_copy['metadata'][key] = value.item() # Convert to standard Python type | |
| serializable_nodes.append((nid, node_copy)) | |
| # Create a new graph for serialization to ensure correct format | |
| serializable_graph = nx.Graph() | |
| serializable_graph.add_nodes_from(serializable_nodes) | |
| serializable_graph.add_edges_from(self.graph.edges(data=True)) | |
| data = json_graph.node_link_data(serializable_graph, edges="links") | |
| compression_info = { | |
| 'original_dim': self.original_dim, | |
| 'compressed_dim': getattr(self.pca, 'n_components_', None) if self.pca else None, | |
| 'quantization_bits': self.quantization_bits, | |
| 'pca_components': self.pca.components_.tolist() if self.pca and hasattr(self.pca, 'components_') else None, | |
| 'pca_mean': self.pca.mean_.tolist() if self.pca and hasattr(self.pca, 'mean_') else None, | |
| 'use_vae_compression': self.use_vae_compression, | |
| 'use_combined_compression': self.use_combined_compression, | |
| 'vae_latent_dim': self.vae_latent_dim if self.use_vae_compression else None, | |
| 'max_nodes': self.max_nodes, # Save current max_nodes setting | |
| # AGI Features: Save PCA on VAE state | |
| 'pca_on_vae_components': self.pca_on_vae.components_.tolist() if self.pca_on_vae and hasattr(self.pca_on_vae, 'components_') else None, | |
| 'pca_on_vae_mean': self.pca_on_vae.mean_.tolist() if self.pca_on_vae and hasattr(self.pca_on_vae, 'mean_') else None, | |
| 'pca_on_vae_dim': getattr(self.pca_on_vae, 'n_components_', None) if self.pca_on_vae else None | |
| } | |
| # Save VAE weights to separate file (more efficient than JSON) | |
| if self.vae_compressor is not None: | |
| try: | |
| vae_weights_file = self.graph_file.replace('.json.gz', '_vae_weights.pt') | |
| self.vae_compressor.save_weights(vae_weights_file) | |
| compression_info['vae_weights_file'] = vae_weights_file | |
| compression_info['vae_enable_training'] = self.enable_vae_training | |
| compression_info['vae_learning_rate'] = self.vae_learning_rate | |
| print(f"[VAE] Saved VAE weights to {vae_weights_file}") | |
| except Exception as e: | |
| print(f"[WARN] Could not save VAE weights: {e}") | |
| # OPTIMIZATION: Save as gzip-compressed JSON (much smaller file size) | |
| # Remove indent to save space (30-40% reduction) | |
| save_data = { | |
| 'graph_data': data, | |
| 'id_counter': self.id_counter, | |
| 'node_queue': list(self.node_queue), | |
| 'access_counts': self.access_counts, | |
| 'last_accessed': self.last_accessed, | |
| 'importance_scores': self.importance_scores, # Save learnable importance scores | |
| 'compression_info': compression_info, | |
| # AGI Features: Save human-like memory structures | |
| 'node_types': self.node_types if self.enable_memory_types else {}, | |
| 'temporal_connections': {str(k): v for k, v in self.temporal_connections.items()} if self.enable_temporal_connections else {}, | |
| 'forgetting_curves': {str(k): v for k, v in self.forgetting_curves.items()} if self.enable_consolidation else {}, | |
| 'memory_hierarchies': {str(k): v for k, v in self.memory_hierarchies.items()} if self.enable_memory_types else {}, | |
| 'agi_features': { | |
| 'enable_memory_types': self.enable_memory_types, | |
| 'enable_temporal_connections': self.enable_temporal_connections, | |
| 'enable_consolidation': self.enable_consolidation | |
| } | |
| } | |
| # Save as gzip-compressed JSON for maximum compression (ONLY compressed version) | |
| json_str = json.dumps(save_data, separators=(',', ':')) # No spaces, minimal JSON | |
| json_bytes = json_str.encode('utf-8') | |
| # Ensure graph_file always ends with .gz | |
| if not self.graph_file.endswith('.gz'): | |
| self.graph_file = self.graph_file + '.gz' | |
| # Save ONLY compressed version with maximum compression level | |
| with gzip.open(self.graph_file, 'wt', compresslevel=9) as f: # Maximum compression | |
| f.write(json_str) | |
| # Clean up old uncompressed file if it exists (migration cleanup) | |
| old_uncompressed = self.graph_file.replace('.gz', '') | |
| if old_uncompressed.endswith('.json') and os.path.exists(old_uncompressed): | |
| try: | |
| os.remove(old_uncompressed) | |
| print(f"[CLEANUP] Removed old uncompressed file: {old_uncompressed}") | |
| except Exception as e: | |
| print(f"[WARN] Could not remove old uncompressed file {old_uncompressed}: {e}") | |
| # Calculate size reduction | |
| uncompressed_size = len(json_bytes) | |
| compressed_size = os.path.getsize(self.graph_file) if os.path.exists(self.graph_file) else uncompressed_size | |
| compression_ratio = uncompressed_size / max(compressed_size, 1) | |
| print(f"[SAVE] MemoryGraph saved (compressed only). Size: {uncompressed_size/1024/1024:.2f}MB -> {compressed_size/1024/1024:.2f}MB ({compression_ratio:.1f}x compression)") | |
| except Exception as e: | |
| print(f"[FAIL] Save error: {e}") | |
| def load_graph(self): | |
| try: | |
| # Ensure we're looking for .gz file | |
| graph_file_gz = self.graph_file if self.graph_file.endswith('.gz') else self.graph_file + '.gz' | |
| # Load ONLY compressed version (fast and efficient) | |
| if os.path.exists(graph_file_gz): | |
| with gzip.open(graph_file_gz, 'rt') as f: | |
| d = json.load(f) | |
| print(f"[LOAD] Loaded compressed MemoryGraph from {graph_file_gz}") | |
| else: | |
| # Check for old uncompressed version (one-time migration) | |
| old_file = graph_file_gz.replace('.gz', '') | |
| if old_file.endswith('.json') and os.path.exists(old_file): | |
| print(f"[MIGRATE] Found old uncompressed file {old_file}, loading for migration...") | |
| with open(old_file, 'r') as f: | |
| d = json.load(f) | |
| print(f"[LOAD] Loaded MemoryGraph from {old_file} (will be saved as compressed on next save)") | |
| # Update graph_file to use .gz for future saves | |
| self.graph_file = graph_file_gz | |
| else: | |
| print(f"[WARN] MemoryGraph file not found: {graph_file_gz}") | |
| return | |
| self.graph = json_graph.node_link_graph(d['graph_data'], directed=False, edges="links") | |
| self.id_counter = d.get('id_counter', 0) | |
| self.node_queue = deque(d.get('node_queue', [])) | |
| self.access_counts = d.get('access_counts', {}) | |
| self.last_accessed = d.get('last_accessed', {}) | |
| # Load importance scores, initialize to 0.5 for nodes without scores (backward compatibility) | |
| loaded_scores = d.get('importance_scores', {}) | |
| self.importance_scores = {} | |
| for nid in self.graph.nodes(): | |
| self.importance_scores[nid] = loaded_scores.get(str(nid), loaded_scores.get(nid, 0.5)) | |
| info = d.get('compression_info', {}) | |
| self.original_dim = info.get('original_dim') | |
| comp_dim = info.get('compressed_dim') | |
| # Load AGI Features (backward compatible) | |
| agi_features = d.get('agi_features', {}) | |
| self.enable_memory_types = agi_features.get('enable_memory_types', self.enable_memory_types) | |
| self.enable_temporal_connections = agi_features.get('enable_temporal_connections', self.enable_temporal_connections) | |
| self.enable_consolidation = agi_features.get('enable_consolidation', self.enable_consolidation) | |
| self.use_combined_compression = info.get('use_combined_compression', self.use_combined_compression) | |
| # Load memory types (backward compatible) | |
| if self.enable_memory_types: | |
| loaded_types = d.get('node_types', {}) | |
| self.node_types = {int(k) if isinstance(k, str) else k: v for k, v in loaded_types.items()} | |
| else: | |
| self.node_types = {} | |
| # Load temporal connections (backward compatible) | |
| if self.enable_temporal_connections: | |
| loaded_temporal = d.get('temporal_connections', {}) | |
| self.temporal_connections = {int(k) if isinstance(k, str) else k: v for k, v in loaded_temporal.items()} | |
| else: | |
| self.temporal_connections = {} | |
| # Load forgetting curves (backward compatible) | |
| if self.enable_consolidation: | |
| loaded_curves = d.get('forgetting_curves', {}) | |
| self.forgetting_curves = {int(k) if isinstance(k, str) else k: v for k, v in loaded_curves.items()} | |
| else: | |
| self.forgetting_curves = {} | |
| # Load memory hierarchies (backward compatible) | |
| if self.enable_memory_types: | |
| loaded_hierarchies = d.get('memory_hierarchies', {}) | |
| self.memory_hierarchies = {int(k) if isinstance(k, str) else k: v for k, v in loaded_hierarchies.items()} | |
| else: | |
| self.memory_hierarchies = {} | |
| # Load VAE compressor if it was used | |
| if info.get('use_vae_compression', False): | |
| self.use_vae_compression = True | |
| self.vae_latent_dim = info.get('vae_latent_dim', 128) | |
| # Load VAE training settings (backward compatible) | |
| self.enable_vae_training = info.get('vae_enable_training', self.enable_vae_training) | |
| self.vae_learning_rate = info.get('vae_learning_rate', self.vae_learning_rate) | |
| # Try to load VAE weights from separate file (new method) | |
| vae_weights_file = info.get('vae_weights_file') | |
| if vae_weights_file and os.path.exists(vae_weights_file): | |
| try: | |
| from neuro_fusion import VAECompressor | |
| if self.original_dim: | |
| hidden_dim = self.original_dim * 2 | |
| self.vae_compressor = VAECompressor( | |
| input_dim=self.original_dim, | |
| hidden_dim=hidden_dim, | |
| latent_dim=self.vae_latent_dim, | |
| shared_model=self.shared_model, | |
| shared_tokenizer=self.shared_tokenizer, | |
| enable_training=self.enable_vae_training, | |
| learning_rate=self.vae_learning_rate, | |
| device=self.device | |
| ) | |
| if self.vae_compressor.load_weights(vae_weights_file, strict=False): | |
| print(f"[CONFIG] VAE compressor loaded from {vae_weights_file}") | |
| self.vae_weights_path = vae_weights_file | |
| else: | |
| print(f"[WARN] Could not load VAE weights, will re-initialize") | |
| self.vae_compressor = None | |
| except Exception as e: | |
| print(f"[WARN] Error loading VAE compressor: {e}. Will re-initialize on first use.") | |
| import traceback | |
| traceback.print_exc() | |
| self.vae_compressor = None | |
| else: | |
| # Fallback: Try to load from old JSON format (backward compatibility) | |
| vae_state = info.get('vae_state_dict') | |
| if vae_state and self.original_dim: | |
| try: | |
| from neuro_fusion import VAECompressor | |
| hidden_dim = self.original_dim * 2 | |
| self.vae_compressor = VAECompressor( | |
| input_dim=self.original_dim, | |
| hidden_dim=hidden_dim, | |
| latent_dim=self.vae_latent_dim, | |
| shared_model=self.shared_model, | |
| shared_tokenizer=self.shared_tokenizer, | |
| enable_training=self.enable_vae_training, | |
| learning_rate=self.vae_learning_rate, | |
| device=self.device | |
| ) | |
| # Convert list back to tensors (old format) | |
| vae_state_tensors = {} | |
| for k, v in vae_state.items(): | |
| if isinstance(v, list): | |
| # Handle nested lists (2D tensors) | |
| if isinstance(v[0], list): | |
| vae_state_tensors[k] = torch.tensor(v, device=self.device) | |
| else: | |
| vae_state_tensors[k] = torch.tensor(v, device=self.device) | |
| else: | |
| vae_state_tensors[k] = v | |
| self.vae_compressor.load_state_dict(vae_state_tensors, strict=False) | |
| if self.enable_vae_training: | |
| self.vae_compressor.train() | |
| else: | |
| self.vae_compressor.eval() | |
| print("[CONFIG] VAE compressor loaded from saved state (old format).") | |
| except Exception as e: | |
| print(f"[WARN] Error loading VAE compressor from old format: {e}. Will re-initialize on first use.") | |
| self.vae_compressor = None | |
| # Load PCA on VAE if it was used (AGI feature) | |
| if self.use_combined_compression and info.get('pca_on_vae_dim'): | |
| try: | |
| pca_dim = info.get('pca_on_vae_dim') | |
| pca_components = info.get('pca_on_vae_components') | |
| pca_mean = info.get('pca_on_vae_mean') | |
| if pca_components and pca_mean: | |
| # Create a dummy dataset to properly fit PCA (required for sklearn) | |
| dummy_data = np.random.randn(10, len(pca_mean)) | |
| self.pca_on_vae = PCA(n_components=pca_dim) | |
| self.pca_on_vae.fit(dummy_data) | |
| # Now set the actual components and mean | |
| self.pca_on_vae.components_ = np.array(pca_components) | |
| self.pca_on_vae.mean_ = np.array(pca_mean) | |
| self.pca_on_vae.n_components_ = pca_dim | |
| # Set other required attributes | |
| if not hasattr(self.pca_on_vae, 'explained_variance_'): | |
| self.pca_on_vae.explained_variance_ = np.var(self.pca_on_vae.components_, axis=0) | |
| if not hasattr(self.pca_on_vae, 'explained_variance_ratio_'): | |
| total_var = np.sum(self.pca_on_vae.explained_variance_) | |
| self.pca_on_vae.explained_variance_ratio_ = self.pca_on_vae.explained_variance_ / total_var if total_var > 0 else np.ones(pca_dim) / pca_dim | |
| print(f"[AGI] PCA on VAE loaded: {pca_dim} components") | |
| except Exception as e: | |
| print(f"[WARN] Error loading PCA on VAE: {e}") | |
| import traceback | |
| traceback.print_exc() | |
| self.pca_on_vae = None | |
| # Load PCA if it was used (and VAE is not) | |
| if self.original_dim and comp_dim and not self.use_vae_compression: | |
| self.pca = PCA(n_components=comp_dim) | |
| if info.get('pca_components') is not None and info.get('pca_mean') is not None: | |
| try: | |
| self.pca.components_ = np.array(info['pca_components']) | |
| self.pca.mean_ = np.array(info['pca_mean']) | |
| print("[CONFIG] PCA components and mean loaded.") | |
| except ValueError as ve: | |
| print(f"[WARN] Error loading PCA components/mean (shape mismatch?): {ve}. PCA might need re-fitting.") | |
| self.pca = None | |
| else: | |
| print("[WARN] PCA components or mean not found in saved data. PCA might need re-fitting.") | |
| self.pca = None # Reset PCA if components not fully loaded | |
| self.quantization_bits = info.get('quantization_bits', self.quantization_bits) | |
| # CRITICAL: Don't restore max_nodes from saved file if current setting is None (infinite growth) | |
| # This ensures infinite growth is preserved even if old graph had a limit | |
| saved_max_nodes = info.get('max_nodes') | |
| if self.max_nodes is None: | |
| # If we're in infinite growth mode, never restore max_nodes from saved file | |
| if saved_max_nodes is not None: | |
| print(f"[CONFIG] Preserving infinite growth mode (ignoring saved max_nodes={saved_max_nodes})") | |
| # Keep self.max_nodes as None (infinite growth) | |
| elif saved_max_nodes is not None and self.max_nodes is not None: | |
| # Only restore if both are set and we want to use saved value | |
| # But for now, prefer current setting | |
| pass | |
| print("[NETWORK] MemoryGraph loaded.") | |
| # Backwards compatibility: ensure all embeddings are in dict format {data:[], compressed: bool} | |
| for nid, node_data in self.graph.nodes(data=True): | |
| current_embedding = node_data.get('embedding') | |
| if isinstance(current_embedding, list): # Old format: list of floats | |
| self.graph.nodes[nid]['embedding'] = {'data': current_embedding, 'compressed': False} | |
| elif isinstance(current_embedding, dict) and 'data' not in current_embedding: | |
| # Old dict format: might be {'embedding_key': [data]} | |
| if current_embedding: # if not empty dict | |
| self.graph.nodes[nid]['embedding'] = {'data': list(current_embedding.values())[0], 'compressed': False} | |
| else: # empty dict | |
| self.graph.nodes[nid]['embedding'] = {'data': [], 'compressed': False} | |
| elif not isinstance(current_embedding, dict): # Other unexpected formats (e.g. None) | |
| print(f"Warning: Node {nid} embedding format unexpected during load. Setting to empty dict.") | |
| self.graph.nodes[nid]['embedding'] = {'data': [], 'compressed': False} | |
| # Ensure metadata items are standard Python types (not numpy types) | |
| metadata_to_clean = node_data.get('metadata', {}) | |
| cleaned_metadata = {} | |
| for k, v in metadata_to_clean.items(): | |
| if isinstance(v, (np.ndarray, np.generic)): | |
| cleaned_metadata[k] = v.tolist() if isinstance(v, np.ndarray) else v.item() | |
| else: | |
| cleaned_metadata[k] = v | |
| self.graph.nodes[nid]['metadata'] = cleaned_metadata | |
| except Exception as e: | |
| print(f"[FAIL] Load error: {e}") | |
| # If load fails, re-initialize an empty graph | |
| self.graph = nx.Graph() | |
| self.id_counter = 0 | |
| self.node_queue = deque() | |
| self.pca = None | |
| self.original_dim = None | |
| def get_memory_summary(self): | |
| try: | |
| total_bytes = 0 | |
| for _, n in self.graph.nodes(data=True): | |
| # Ensure embedding is in the expected dictionary format | |
| emb_data = n.get('embedding') | |
| if isinstance(emb_data, dict) and 'data' in emb_data: | |
| # Approximate size based on data list length and assumed bit depth | |
| item_size = emb_data.get('bits', 16) // 8 if emb_data.get('bits', 16) < 32 else 4 # float32 is 4 bytes | |
| total_bytes += len(emb_data['data']) * item_size | |
| elif isinstance(emb_data, list): # Handle old list format | |
| total_bytes += len(emb_data) * 4 # Assume float32 | |
| else: | |
| print(f"Warning: Unexpected embedding format encountered for size calculation: {type(emb_data)}") | |
| return { | |
| 'node_count': self.graph.number_of_nodes(), | |
| 'edge_count': self.graph.number_of_edges(), | |
| 'estimated_memory_mb': round(total_bytes / (1024**2), 2) | |
| } | |
| except Exception as e: | |
| return {'error': str(e)} | |
| def analyze_memory(self): | |
| return self.get_memory_summary() | |