import os import ast import zlib import json import logging from logos.logos_core import get_gpf from logos.manifold_state import ManifoldState logger = logging.getLogger("TokenizerAgent") class TokenizerAgent: """ Protocol 6: Tokenizer Agent Parses a repository (Source) into Prime Tokens (Domain Potentiality Space). """ def __init__(self, root_dir): self.root_dir = root_dir self.manifold = ManifoldState() # Connects to logos/manifold.json def scan_and_tokenize(self): """Scans root_dir and tokenizes all supported files.""" tokens = [] for root, dirs, files in os.walk(self.root_dir): # Skip hidden/system dirs dirs[:] = [d for d in dirs if not d.startswith('.') and not d.startswith('__')] for file in files: if file.endswith(".py") or file.endswith(".md"): path = os.path.join(root, file) token = self._tokenize_file(path) if token: tokens.append(token) self._register_tokens_to_manifold(tokens) return tokens def _tokenize_file(self, filepath): """Parses a single file into a Prime Token.""" try: with open(filepath, 'r', encoding='utf-8', errors='ignore') as f: content = f.read() rel_path = os.path.relpath(filepath, self.root_dir) # --- Prime Topology Analysis --- # 1. Hash Content -> Integer Field h = zlib.adler32(content.encode('utf-8')) # 2. Calculate Resonance (GPF) gpf = get_gpf(h) # 3. Determine Domain if gpf < 200: domain = "INNER_SHELL" elif gpf < 2000: domain = "PRIME_CHANNEL" else: domain = "OUTER_SHELL" token = { "id": h, "name": os.path.basename(filepath), "path": rel_path, "type": "file", "geometry": { "hash": h, "gpf": gpf, "domain": domain }, "content_preview": content[:100] } return token except Exception as e: logger.error(f"Failed to tokenize {filepath}: {e}") return None def _register_tokens_to_manifold(self, tokens): """Updates the physical Manifold State with new tokens.""" # Ensure manifold state has a graph structure if "graph" not in self.manifold.state: self.manifold.state["graph"] = {"nodes": [], "edges": []} # Add new nodes (deduplicated by ID) existing_ids = {n["id"] for n in self.manifold.state["graph"]["nodes"]} for t in tokens: if t["id"] not in existing_ids: self.manifold.state["graph"]["nodes"].append(t) self.manifold.save() logger.info(f"Registered {len(tokens)} tokens to Manifold.") if __name__ == "__main__": # Test Run agent = TokenizerAgent(".") print("Tokenizing current directory...") agent.scan_and_tokenize() print("Done.")