Spaces:
Runtime error
Runtime error
| import os | |
| import ast | |
| import zlib | |
| import json | |
| import logging | |
| from logos.logos_core import get_gpf | |
| from logos.manifold_state import ManifoldState | |
| logger = logging.getLogger("TokenizerAgent") | |
| class TokenizerAgent: | |
| """ | |
| Protocol 6: Tokenizer Agent | |
| Parses a repository (Source) into Prime Tokens (Domain Potentiality Space). | |
| """ | |
| def __init__(self, root_dir): | |
| self.root_dir = root_dir | |
| self.manifold = ManifoldState() # Connects to logos/manifold.json | |
| def scan_and_tokenize(self): | |
| """Scans root_dir and tokenizes all supported files.""" | |
| tokens = [] | |
| for root, dirs, files in os.walk(self.root_dir): | |
| # Skip hidden/system dirs | |
| dirs[:] = [d for d in dirs if not d.startswith('.') and not d.startswith('__')] | |
| for file in files: | |
| if file.endswith(".py") or file.endswith(".md"): | |
| path = os.path.join(root, file) | |
| token = self._tokenize_file(path) | |
| if token: | |
| tokens.append(token) | |
| self._register_tokens_to_manifold(tokens) | |
| return tokens | |
| def _tokenize_file(self, filepath): | |
| """Parses a single file into a Prime Token.""" | |
| try: | |
| with open(filepath, 'r', encoding='utf-8', errors='ignore') as f: | |
| content = f.read() | |
| rel_path = os.path.relpath(filepath, self.root_dir) | |
| # --- Prime Topology Analysis --- | |
| # 1. Hash Content -> Integer Field | |
| h = zlib.adler32(content.encode('utf-8')) | |
| # 2. Calculate Resonance (GPF) | |
| gpf = get_gpf(h) | |
| # 3. Determine Domain | |
| if gpf < 200: | |
| domain = "INNER_SHELL" | |
| elif gpf < 2000: | |
| domain = "PRIME_CHANNEL" | |
| else: | |
| domain = "OUTER_SHELL" | |
| token = { | |
| "id": h, | |
| "name": os.path.basename(filepath), | |
| "path": rel_path, | |
| "type": "file", | |
| "geometry": { | |
| "hash": h, | |
| "gpf": gpf, | |
| "domain": domain | |
| }, | |
| "content_preview": content[:100] | |
| } | |
| return token | |
| except Exception as e: | |
| logger.error(f"Failed to tokenize {filepath}: {e}") | |
| return None | |
| def _register_tokens_to_manifold(self, tokens): | |
| """Updates the physical Manifold State with new tokens.""" | |
| # Ensure manifold state has a graph structure | |
| if "graph" not in self.manifold.state: | |
| self.manifold.state["graph"] = {"nodes": [], "edges": []} | |
| # Add new nodes (deduplicated by ID) | |
| existing_ids = {n["id"] for n in self.manifold.state["graph"]["nodes"]} | |
| for t in tokens: | |
| if t["id"] not in existing_ids: | |
| self.manifold.state["graph"]["nodes"].append(t) | |
| self.manifold.save() | |
| logger.info(f"Registered {len(tokens)} tokens to Manifold.") | |
| if __name__ == "__main__": | |
| # Test Run | |
| agent = TokenizerAgent(".") | |
| print("Tokenizing current directory...") | |
| agent.scan_and_tokenize() | |
| print("Done.") | |