GitHub Copilot
Protocol 22: Update HF Inference to Router endpoint
edae06c
import os
import ast
import zlib
import json
import logging
from logos.logos_core import get_gpf
from logos.manifold_state import ManifoldState
logger = logging.getLogger("TokenizerAgent")
class TokenizerAgent:
"""
Protocol 6: Tokenizer Agent
Parses a repository (Source) into Prime Tokens (Domain Potentiality Space).
"""
def __init__(self, root_dir):
self.root_dir = root_dir
self.manifold = ManifoldState() # Connects to logos/manifold.json
def scan_and_tokenize(self):
"""Scans root_dir and tokenizes all supported files."""
tokens = []
for root, dirs, files in os.walk(self.root_dir):
# Skip hidden/system dirs
dirs[:] = [d for d in dirs if not d.startswith('.') and not d.startswith('__')]
for file in files:
if file.endswith(".py") or file.endswith(".md"):
path = os.path.join(root, file)
token = self._tokenize_file(path)
if token:
tokens.append(token)
self._register_tokens_to_manifold(tokens)
return tokens
def _tokenize_file(self, filepath):
"""Parses a single file into a Prime Token."""
try:
with open(filepath, 'r', encoding='utf-8', errors='ignore') as f:
content = f.read()
rel_path = os.path.relpath(filepath, self.root_dir)
# --- Prime Topology Analysis ---
# 1. Hash Content -> Integer Field
h = zlib.adler32(content.encode('utf-8'))
# 2. Calculate Resonance (GPF)
gpf = get_gpf(h)
# 3. Determine Domain
if gpf < 200:
domain = "INNER_SHELL"
elif gpf < 2000:
domain = "PRIME_CHANNEL"
else:
domain = "OUTER_SHELL"
token = {
"id": h,
"name": os.path.basename(filepath),
"path": rel_path,
"type": "file",
"geometry": {
"hash": h,
"gpf": gpf,
"domain": domain
},
"content_preview": content[:100]
}
return token
except Exception as e:
logger.error(f"Failed to tokenize {filepath}: {e}")
return None
def _register_tokens_to_manifold(self, tokens):
"""Updates the physical Manifold State with new tokens."""
# Ensure manifold state has a graph structure
if "graph" not in self.manifold.state:
self.manifold.state["graph"] = {"nodes": [], "edges": []}
# Add new nodes (deduplicated by ID)
existing_ids = {n["id"] for n in self.manifold.state["graph"]["nodes"]}
for t in tokens:
if t["id"] not in existing_ids:
self.manifold.state["graph"]["nodes"].append(t)
self.manifold.save()
logger.info(f"Registered {len(tokens)} tokens to Manifold.")
if __name__ == "__main__":
# Test Run
agent = TokenizerAgent(".")
print("Tokenizing current directory...")
agent.scan_and_tokenize()
print("Done.")