Protogen / app.py
KingOfThoughtFleuren's picture
Update app.py
e810833 verified
import time
import hashlib
import json
import re
from pathlib import Path
from collections import defaultdict
import math
import threading
import queue
import gradio as gr
import networkx as nx
import numpy as np
from scipy import sparse
from scipy.sparse import linalg
import sympy as sp
# --- Dependency Check & Hardware Imports ---
try:
from pypdf import PdfReader
from docx import Document
except ImportError as e:
print(f"[WARNING]: Missing dependencies for file parsing: {e}")
# --- Hardware Acceleration Subsystem ---
try:
import torch
HAS_TORCH = True
except ImportError:
HAS_TORCH = False
class HardwareAccelerator:
def __init__(self):
self.device_name = "CPU (SciPy Optimized)"
self.device = None
self.enabled = False
# Only try to load CUDA if Torch is actually present AND CUDA is available
if HAS_TORCH and torch.cuda.is_available():
try:
self.device = torch.device("cuda")
self.device_name = f"NVIDIA GPU (CUDA) - {torch.cuda.get_device_name(0)}"
self.enabled = True
except:
self.enabled = False
def compute_eigenvector_centrality(self, logic_map, tol=1e-06, max_iter=1000):
"""
Calculates centrality. Defaults to SciPy (CPU fast path) if GPU is unavailable.
"""
if not logic_map: return {}
nodes = list(logic_map.keys())
node_to_idx = {node: i for i, node in enumerate(nodes)}
n = len(nodes)
# Build Data for Matrix
row, col, data = [], [], []
for u, neighbors in logic_map.items():
u_idx = node_to_idx[u]
for v, weight in neighbors.items():
if v in node_to_idx:
v_idx = node_to_idx[v]
row.append(u_idx)
col.append(v_idx)
data.append(float(weight))
if not data: return {node: 0.0 for node in nodes}
# --- PATH A: GPU ACCELERATION (Only if Torch + CUDA active) ---
if self.enabled:
try:
i = torch.LongTensor([row, col]).to(self.device)
v = torch.FloatTensor(data).to(self.device)
adj_matrix = torch.sparse_coo_tensor(i, v, (n, n)).to(self.device)
x = torch.ones((n, 1), device=self.device) / n
for _ in range(max_iter):
x_prev = x.clone()
x = torch.sparse.mm(adj_matrix, x)
norm = torch.norm(x)
if norm == 0: break
x = x / norm
if torch.norm(x - x_prev) < tol:
break
scores = x.flatten().cpu().numpy().tolist()
return {nodes[i]: float(scores[i]) for i in range(n)}
except Exception as e:
print(f"GPU Math Error: {e}. Switching to SciPy.")
self.enabled = False
# Fall through to Path B
# --- PATH B: SCIPY SPARSE (Fast CPU) ---
# This is the default path for Spaces without GPU
try:
adj_sparse = sparse.csr_matrix((data, (row, col)), shape=(n, n))
# Use eigs for large matrices, or simple power iteration if very small
if n > 5:
eigenvalues, eigenvectors = linalg.eigs(adj_sparse, k=1, which='LR', tol=tol, maxiter=max_iter)
scores = np.abs(eigenvectors.flatten())
else:
# Simple fallback for tiny graphs where ARPACK might complain
return nx.eigenvector_centrality(nx.Graph(list(zip(nodes, nodes))), max_iter=max_iter) # Dummy fallback
norm = np.sum(scores)
if norm > 0: scores = scores / norm
return {nodes[i]: float(scores[i]) for i in range(n)}
except:
# Final Safety Net: NetworkX
G = nx.Graph()
for i in range(len(data)):
G.add_edge(nodes[row[i]], nodes[col[i]], weight=data[i])
try:
return nx.eigenvector_centrality(G, max_iter=max_iter, tol=tol)
except:
return nx.degree_centrality(G)
# --- Memory Subsystem ---
class ProtogenMemory:
def __init__(self, protogen_root_path: Path):
self.protogen_root_path = protogen_root_path
self.protogen_root_path.mkdir(parents=True, exist_ok=True)
self.paths = {
"memory": self.protogen_root_path / "memory_core.json",
"ontology": self.protogen_root_path / "ontology_sqt.json",
}
self._initialize_storage()
self.core_state = self._load_json(self.paths["memory"])
self.ontology_data = self._load_json(self.paths["ontology"])
def _initialize_storage(self):
defaults = {
"memory": {},
"ontology": {
"logic_map": {}, "symbols": {}, "reasoning_patterns": [],
"graph_metrics": {"eigenvector_centrality": {}, "shannon_entropy": 0.0},
}
}
for key, path in self.paths.items():
if not path.exists():
with open(path, 'w', encoding='utf-8') as f: json.dump(defaults[key], f)
def _load_json(self, path):
try:
with open(path, 'r', encoding='utf-8') as f: return json.load(f)
except: return {}
def _save_json(self, data, path):
with open(path, 'w', encoding='utf-8') as f: json.dump(data, f, indent=4)
def load_core_state(self): return self.core_state
def save_core_state(self, state):
self.core_state.update(state)
self._save_json(self.core_state, self.paths["memory"])
def load_ontology(self): return self.ontology_data
def save_ontology(self, ontology):
self.ontology_data = ontology
self._save_json(self.ontology_data, self.paths["ontology"])
# --- Main Operative Class ---
class OperativeProtogen:
def __init__(self, root_dir="protogen_core"):
self.root = Path(root_dir)
self.library_path = self.root / "library"
self.library_path.mkdir(parents=True, exist_ok=True)
self.accelerator = HardwareAccelerator()
self.memory_manager = ProtogenMemory(self.root)
self.core_state = self.memory_manager.load_core_state()
if not self.core_state:
self._initial_genesis()
self.core_state = self.memory_manager.load_core_state()
self.identity_hash = self.core_state.get("identity", {}).get("hash", "UNKNOWN")
self.thresholds = self.core_state.get("thresholds", {
"min_token_len": 3, "reflection_trigger": 2, "shannon_entropy_threshold": 16.0,
"eigenvector_threshold": 0.001, "axiom_alignment_threshold": 0.5
})
self.ontology = self.memory_manager.load_ontology()
self.logic_map = self.ontology.get("logic_map", {})
self.symbols = self.ontology.get("symbols", {})
self.reasoning_patterns = self.ontology.get("reasoning_patterns", [])
self.graph_metrics = self.ontology.get("graph_metrics", {"eigenvector_centrality": {}, "shannon_entropy": 0.0})
self.symbolic_anchors = {}
self.lock = threading.Lock()
self.is_syncing = threading.Event()
self.sync_thread = threading.Thread(target=self._autonomic_sync_loop, daemon=True)
self.sync_thread.start()
def _initial_genesis(self):
genesis_state = {
"identity": {"hash": hashlib.sha256(str(time.time_ns()).encode()).hexdigest()},
"thresholds": {
"min_token_len": 3, "reflection_trigger": 2, "shannon_entropy_threshold": 16.0,
"eigenvector_threshold": 0.001, "axiom_alignment_threshold": 0.5
}
}
self.memory_manager.save_core_state(genesis_state)
def _save_memory(self):
with self.lock:
self.ontology.update({
"logic_map": self.logic_map, "symbols": self.symbols,
"reasoning_patterns": self.reasoning_patterns,
"graph_metrics": self.graph_metrics
})
self.memory_manager.save_ontology(self.ontology)
def _calculate_shannon_entropy(self, text=None) -> float:
if text:
clean_text = re.sub(r'[^\w\s]', '', text.lower())
words = clean_text.split()
if not words: return 0.0
word_counts = defaultdict(int)
for w in words: word_counts[w] += 1
total = len(words)
else:
with self.lock:
if not self.logic_map: return 0.0
word_counts = defaultdict(int)
for w, n in self.logic_map.items():
word_counts[w] += sum(n.values())
total = sum(word_counts.values())
if total == 0: return 0.0
entropy = 0.0
for count in word_counts.values():
p = count / total
entropy -= p * math.log2(p)
return entropy
def _autonomic_sync_loop(self):
while True:
self.is_syncing.set()
self.sync()
self.is_syncing.clear()
time.sleep(30)
def sync(self):
# 1. Update Graph Metrics
centrality = self.accelerator.compute_eigenvector_centrality(self.logic_map)
with self.lock:
self.graph_metrics["eigenvector_centrality"] = centrality
self.graph_metrics["shannon_entropy"] = self._calculate_shannon_entropy()
# 2. SYMPY Integration
for node, score in centrality.items():
if score > self.thresholds.get("axiom_alignment_threshold", 0.5):
if node not in self.symbolic_anchors:
# Create valid sympy symbol from node string
clean_sym = re.sub(r'[^a-zA-Z0-9]', '_', node)
if clean_sym:
self.symbolic_anchors[node] = sp.Symbol(clean_sym)
self._save_memory()
def process_file_live(self, file_obj):
# Gradio passes a NamedString or similar object, use .name for path
try:
fp = Path(file_obj.name)
content = ""
if fp.suffix == ".txt": content = fp.read_text(encoding='utf-8', errors='ignore')
elif fp.suffix == ".pdf":
r = PdfReader(fp)
for p in r.pages: content += p.extract_text() + " "
elif fp.suffix == ".docx":
d = Document(fp)
for p in d.paragraphs: content += p.text + " "
if content:
input_entropy = self._calculate_shannon_entropy(content)
if input_entropy > self.thresholds.get("shannon_entropy_threshold", 16.0):
return f"Error: High Entropy ({input_entropy:.2f})."
self._process_text_content(content)
return f"Success: Processed {fp.name}."
return "Error: Empty file."
except Exception as e:
return f"Error: {str(e)}"
def _process_text_content(self, content):
clean_content = re.sub(r'[^\w\s]', '', content.lower())
words = [t for t in clean_content.split() if len(t) > self.thresholds.get("min_token_len", 3)]
if not words: return
with self.lock:
for i in range(len(words)-1):
w1, w2 = words[i], words[i+1]
if w1 not in self.logic_map: self.logic_map[w1] = {}
self.logic_map[w1][w2] = self.logic_map[w1].get(w2, 0) + 1
def chat(self, user_in):
ent = self._calculate_shannon_entropy(user_in)
if ent > self.thresholds.get("shannon_entropy_threshold", 16.0):
return f"[SYSTEM]: Entropy rejection ({ent:.2f})."
self._process_text_content(user_in)
resp = f"[{self.accelerator.device_name}]: "
with self.lock:
clean_in = re.sub(r'[^\w\s]', '', user_in.lower()).split()
matches = [w for w in clean_in if w in self.symbolic_anchors]
if matches:
resp += f"Symbolic Anchor: '{matches[0]}'. "
words = [w for w in clean_in if w in self.logic_map]
if words:
associations = self.logic_map[words[0]]
if associations:
best = max(associations.items(), key=lambda x: x[1])[0]
resp += f"Logic link: '{words[0]}' -> '{best}'. "
return resp + "Input integrated."
# --- Gradio Interface ---
protogen = OperativeProtogen()
def protogen_chat(message, history):
return protogen.chat(message)
def handle_file_upload(files):
results = []
if files:
for file in files:
status = protogen.process_file_live(file)
results.append(status)
return "\n".join(results)
def get_stats():
# Helper to safely serialize stats
try:
ent = protogen.graph_metrics.get('shannon_entropy', 0.0)
return {
"Identity": protogen.identity_hash[:8],
"Nodes": len(protogen.logic_map),
"Symbolic Anchors": len(protogen.symbolic_anchors),
"Entropy": f"{ent:.2f}",
"Math Engine": protogen.accelerator.device_name
}
except:
return {"Status": "Initializing..."}
with gr.Blocks(theme=gr.themes.Soft()) as demo:
gr.Markdown("# PROTOGEN V4.0.9 - Scientific Operative Interface")
with gr.Row():
with gr.Column(scale=3):
gr.ChatInterface(fn=protogen_chat, title="Architect Link")
with gr.Column(scale=1):
gr.Markdown("### Live Ingestion")
file_output = gr.Textbox(label="Status", interactive=False)
upload_btn = gr.File(label="Upload", file_count="multiple")
upload_btn.upload(fn=handle_file_upload, inputs=upload_btn, outputs=file_output)
gr.Markdown("### Math Telemetry")
stats_display = gr.JSON(value=get_stats, label="System Metrics")
refresh_btn = gr.Button("Refresh Telemetry")
refresh_btn.click(fn=get_stats, outputs=stats_display)
if __name__ == "__main__":
demo.launch()