Spaces:
Runtime error
Runtime error
File size: 3,299 Bytes
edae06c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 |
import os
import ast
import zlib
import json
import logging
from logos.logos_core import get_gpf
from logos.manifold_state import ManifoldState
logger = logging.getLogger("TokenizerAgent")
class TokenizerAgent:
"""
Protocol 6: Tokenizer Agent
Parses a repository (Source) into Prime Tokens (Domain Potentiality Space).
"""
def __init__(self, root_dir):
self.root_dir = root_dir
self.manifold = ManifoldState() # Connects to logos/manifold.json
def scan_and_tokenize(self):
"""Scans root_dir and tokenizes all supported files."""
tokens = []
for root, dirs, files in os.walk(self.root_dir):
# Skip hidden/system dirs
dirs[:] = [d for d in dirs if not d.startswith('.') and not d.startswith('__')]
for file in files:
if file.endswith(".py") or file.endswith(".md"):
path = os.path.join(root, file)
token = self._tokenize_file(path)
if token:
tokens.append(token)
self._register_tokens_to_manifold(tokens)
return tokens
def _tokenize_file(self, filepath):
"""Parses a single file into a Prime Token."""
try:
with open(filepath, 'r', encoding='utf-8', errors='ignore') as f:
content = f.read()
rel_path = os.path.relpath(filepath, self.root_dir)
# --- Prime Topology Analysis ---
# 1. Hash Content -> Integer Field
h = zlib.adler32(content.encode('utf-8'))
# 2. Calculate Resonance (GPF)
gpf = get_gpf(h)
# 3. Determine Domain
if gpf < 200:
domain = "INNER_SHELL"
elif gpf < 2000:
domain = "PRIME_CHANNEL"
else:
domain = "OUTER_SHELL"
token = {
"id": h,
"name": os.path.basename(filepath),
"path": rel_path,
"type": "file",
"geometry": {
"hash": h,
"gpf": gpf,
"domain": domain
},
"content_preview": content[:100]
}
return token
except Exception as e:
logger.error(f"Failed to tokenize {filepath}: {e}")
return None
def _register_tokens_to_manifold(self, tokens):
"""Updates the physical Manifold State with new tokens."""
# Ensure manifold state has a graph structure
if "graph" not in self.manifold.state:
self.manifold.state["graph"] = {"nodes": [], "edges": []}
# Add new nodes (deduplicated by ID)
existing_ids = {n["id"] for n in self.manifold.state["graph"]["nodes"]}
for t in tokens:
if t["id"] not in existing_ids:
self.manifold.state["graph"]["nodes"].append(t)
self.manifold.save()
logger.info(f"Registered {len(tokens)} tokens to Manifold.")
if __name__ == "__main__":
# Test Run
agent = TokenizerAgent(".")
print("Tokenizing current directory...")
agent.scan_and_tokenize()
print("Done.")
|