Spaces:

KingOfThoughtFleuren
/

Protogen

Sleeping

App Files Files Community

Protogen / app.py

KingOfThoughtFleuren

Update app.py

e810833 verified 27 days ago

raw

history blame contribute delete

14.4 kB

	import time
	import hashlib
	import json
	import re
	from pathlib import Path
	from collections import defaultdict
	import math
	import threading
	import queue

	import gradio as gr
	import networkx as nx
	import numpy as np
	from scipy import sparse
	from scipy.sparse import linalg
	import sympy as sp

	# --- Dependency Check & Hardware Imports ---
	try:
	from pypdf import PdfReader
	from docx import Document
	except ImportError as e:
	print(f"[WARNING]: Missing dependencies for file parsing: {e}")

	# --- Hardware Acceleration Subsystem ---
	try:
	import torch
	HAS_TORCH = True
	except ImportError:
	HAS_TORCH = False

	class HardwareAccelerator:
	def __init__(self):
	self.device_name = "CPU (SciPy Optimized)"
	self.device = None
	self.enabled = False

	# Only try to load CUDA if Torch is actually present AND CUDA is available
	if HAS_TORCH and torch.cuda.is_available():
	try:
	self.device = torch.device("cuda")
	self.device_name = f"NVIDIA GPU (CUDA) - {torch.cuda.get_device_name(0)}"
	self.enabled = True
	except:
	self.enabled = False

	def compute_eigenvector_centrality(self, logic_map, tol=1e-06, max_iter=1000):
	"""
	Calculates centrality. Defaults to SciPy (CPU fast path) if GPU is unavailable.
	"""
	if not logic_map: return {}

	nodes = list(logic_map.keys())
	node_to_idx = {node: i for i, node in enumerate(nodes)}
	n = len(nodes)

	# Build Data for Matrix
	row, col, data = [], [], []
	for u, neighbors in logic_map.items():
	u_idx = node_to_idx[u]
	for v, weight in neighbors.items():
	if v in node_to_idx:
	v_idx = node_to_idx[v]
	row.append(u_idx)
	col.append(v_idx)
	data.append(float(weight))

	if not data: return {node: 0.0 for node in nodes}

	# --- PATH A: GPU ACCELERATION (Only if Torch + CUDA active) ---
	if self.enabled:
	try:
	i = torch.LongTensor([row, col]).to(self.device)
	v = torch.FloatTensor(data).to(self.device)
	adj_matrix = torch.sparse_coo_tensor(i, v, (n, n)).to(self.device)

	x = torch.ones((n, 1), device=self.device) / n
	for _ in range(max_iter):
	x_prev = x.clone()
	x = torch.sparse.mm(adj_matrix, x)
	norm = torch.norm(x)
	if norm == 0: break
	x = x / norm
	if torch.norm(x - x_prev) < tol:
	break
	scores = x.flatten().cpu().numpy().tolist()
	return {nodes[i]: float(scores[i]) for i in range(n)}
	except Exception as e:
	print(f"GPU Math Error: {e}. Switching to SciPy.")
	self.enabled = False
	# Fall through to Path B

	# --- PATH B: SCIPY SPARSE (Fast CPU) ---
	# This is the default path for Spaces without GPU
	try:
	adj_sparse = sparse.csr_matrix((data, (row, col)), shape=(n, n))
	# Use eigs for large matrices, or simple power iteration if very small
	if n > 5:
	eigenvalues, eigenvectors = linalg.eigs(adj_sparse, k=1, which='LR', tol=tol, maxiter=max_iter)
	scores = np.abs(eigenvectors.flatten())
	else:
	# Simple fallback for tiny graphs where ARPACK might complain
	return nx.eigenvector_centrality(nx.Graph(list(zip(nodes, nodes))), max_iter=max_iter) # Dummy fallback

	norm = np.sum(scores)
	if norm > 0: scores = scores / norm
	return {nodes[i]: float(scores[i]) for i in range(n)}
	except:
	# Final Safety Net: NetworkX
	G = nx.Graph()
	for i in range(len(data)):
	G.add_edge(nodes[row[i]], nodes[col[i]], weight=data[i])
	try:
	return nx.eigenvector_centrality(G, max_iter=max_iter, tol=tol)
	except:
	return nx.degree_centrality(G)

	# --- Memory Subsystem ---
	class ProtogenMemory:
	def __init__(self, protogen_root_path: Path):
	self.protogen_root_path = protogen_root_path
	self.protogen_root_path.mkdir(parents=True, exist_ok=True)
	self.paths = {
	"memory": self.protogen_root_path / "memory_core.json",
	"ontology": self.protogen_root_path / "ontology_sqt.json",
	}
	self._initialize_storage()
	self.core_state = self._load_json(self.paths["memory"])
	self.ontology_data = self._load_json(self.paths["ontology"])

	def _initialize_storage(self):
	defaults = {
	"memory": {},
	"ontology": {
	"logic_map": {}, "symbols": {}, "reasoning_patterns": [],
	"graph_metrics": {"eigenvector_centrality": {}, "shannon_entropy": 0.0},
	}
	}
	for key, path in self.paths.items():
	if not path.exists():
	with open(path, 'w', encoding='utf-8') as f: json.dump(defaults[key], f)

	def _load_json(self, path):
	try:
	with open(path, 'r', encoding='utf-8') as f: return json.load(f)
	except: return {}

	def _save_json(self, data, path):
	with open(path, 'w', encoding='utf-8') as f: json.dump(data, f, indent=4)

	def load_core_state(self): return self.core_state
	def save_core_state(self, state):
	self.core_state.update(state)
	self._save_json(self.core_state, self.paths["memory"])

	def load_ontology(self): return self.ontology_data
	def save_ontology(self, ontology):
	self.ontology_data = ontology
	self._save_json(self.ontology_data, self.paths["ontology"])

	# --- Main Operative Class ---
	class OperativeProtogen:
	def __init__(self, root_dir="protogen_core"):
	self.root = Path(root_dir)
	self.library_path = self.root / "library"
	self.library_path.mkdir(parents=True, exist_ok=True)

	self.accelerator = HardwareAccelerator()
	self.memory_manager = ProtogenMemory(self.root)
	self.core_state = self.memory_manager.load_core_state()
	if not self.core_state:
	self._initial_genesis()
	self.core_state = self.memory_manager.load_core_state()

	self.identity_hash = self.core_state.get("identity", {}).get("hash", "UNKNOWN")
	self.thresholds = self.core_state.get("thresholds", {
	"min_token_len": 3, "reflection_trigger": 2, "shannon_entropy_threshold": 16.0,
	"eigenvector_threshold": 0.001, "axiom_alignment_threshold": 0.5
	})

	self.ontology = self.memory_manager.load_ontology()
	self.logic_map = self.ontology.get("logic_map", {})
	self.symbols = self.ontology.get("symbols", {})
	self.reasoning_patterns = self.ontology.get("reasoning_patterns", [])
	self.graph_metrics = self.ontology.get("graph_metrics", {"eigenvector_centrality": {}, "shannon_entropy": 0.0})

	self.symbolic_anchors = {}
	self.lock = threading.Lock()
	self.is_syncing = threading.Event()

	self.sync_thread = threading.Thread(target=self._autonomic_sync_loop, daemon=True)
	self.sync_thread.start()

	def _initial_genesis(self):
	genesis_state = {
	"identity": {"hash": hashlib.sha256(str(time.time_ns()).encode()).hexdigest()},
	"thresholds": {
	"min_token_len": 3, "reflection_trigger": 2, "shannon_entropy_threshold": 16.0,
	"eigenvector_threshold": 0.001, "axiom_alignment_threshold": 0.5
	}
	}
	self.memory_manager.save_core_state(genesis_state)

	def _save_memory(self):
	with self.lock:
	self.ontology.update({
	"logic_map": self.logic_map, "symbols": self.symbols,
	"reasoning_patterns": self.reasoning_patterns,
	"graph_metrics": self.graph_metrics
	})
	self.memory_manager.save_ontology(self.ontology)

	def _calculate_shannon_entropy(self, text=None) -> float:
	if text:
	clean_text = re.sub(r'[^\w\s]', '', text.lower())
	words = clean_text.split()
	if not words: return 0.0
	word_counts = defaultdict(int)
	for w in words: word_counts[w] += 1
	total = len(words)
	else:
	with self.lock:
	if not self.logic_map: return 0.0
	word_counts = defaultdict(int)
	for w, n in self.logic_map.items():
	word_counts[w] += sum(n.values())
	total = sum(word_counts.values())

	if total == 0: return 0.0
	entropy = 0.0
	for count in word_counts.values():
	p = count / total
	entropy -= p * math.log2(p)
	return entropy

	def _autonomic_sync_loop(self):
	while True:
	self.is_syncing.set()
	self.sync()
	self.is_syncing.clear()
	time.sleep(30)

	def sync(self):
	# 1. Update Graph Metrics
	centrality = self.accelerator.compute_eigenvector_centrality(self.logic_map)
	with self.lock:
	self.graph_metrics["eigenvector_centrality"] = centrality
	self.graph_metrics["shannon_entropy"] = self._calculate_shannon_entropy()

	# 2. SYMPY Integration
	for node, score in centrality.items():
	if score > self.thresholds.get("axiom_alignment_threshold", 0.5):
	if node not in self.symbolic_anchors:
	# Create valid sympy symbol from node string
	clean_sym = re.sub(r'[^a-zA-Z0-9]', '_', node)
	if clean_sym:
	self.symbolic_anchors[node] = sp.Symbol(clean_sym)

	self._save_memory()

	def process_file_live(self, file_obj):
	# Gradio passes a NamedString or similar object, use .name for path
	try:
	fp = Path(file_obj.name)
	content = ""

	if fp.suffix == ".txt": content = fp.read_text(encoding='utf-8', errors='ignore')
	elif fp.suffix == ".pdf":
	r = PdfReader(fp)
	for p in r.pages: content += p.extract_text() + " "
	elif fp.suffix == ".docx":
	d = Document(fp)
	for p in d.paragraphs: content += p.text + " "

	if content:
	input_entropy = self._calculate_shannon_entropy(content)
	if input_entropy > self.thresholds.get("shannon_entropy_threshold", 16.0):
	return f"Error: High Entropy ({input_entropy:.2f})."
	self._process_text_content(content)
	return f"Success: Processed {fp.name}."
	return "Error: Empty file."
	except Exception as e:
	return f"Error: {str(e)}"

	def _process_text_content(self, content):
	clean_content = re.sub(r'[^\w\s]', '', content.lower())
	words = [t for t in clean_content.split() if len(t) > self.thresholds.get("min_token_len", 3)]
	if not words: return
	with self.lock:
	for i in range(len(words)-1):
	w1, w2 = words[i], words[i+1]
	if w1 not in self.logic_map: self.logic_map[w1] = {}
	self.logic_map[w1][w2] = self.logic_map[w1].get(w2, 0) + 1

	def chat(self, user_in):
	ent = self._calculate_shannon_entropy(user_in)
	if ent > self.thresholds.get("shannon_entropy_threshold", 16.0):
	return f"[SYSTEM]: Entropy rejection ({ent:.2f})."

	self._process_text_content(user_in)

	resp = f"[{self.accelerator.device_name}]: "
	with self.lock:
	clean_in = re.sub(r'[^\w\s]', '', user_in.lower()).split()
	matches = [w for w in clean_in if w in self.symbolic_anchors]
	if matches:
	resp += f"Symbolic Anchor: '{matches[0]}'. "

	words = [w for w in clean_in if w in self.logic_map]
	if words:
	associations = self.logic_map[words[0]]
	if associations:
	best = max(associations.items(), key=lambda x: x[1])[0]
	resp += f"Logic link: '{words[0]}' -> '{best}'. "

	return resp + "Input integrated."

	# --- Gradio Interface ---
	protogen = OperativeProtogen()

	def protogen_chat(message, history):
	return protogen.chat(message)

	def handle_file_upload(files):
	results = []
	if files:
	for file in files:
	status = protogen.process_file_live(file)
	results.append(status)
	return "\n".join(results)

	def get_stats():
	# Helper to safely serialize stats
	try:
	ent = protogen.graph_metrics.get('shannon_entropy', 0.0)
	return {
	"Identity": protogen.identity_hash[:8],
	"Nodes": len(protogen.logic_map),
	"Symbolic Anchors": len(protogen.symbolic_anchors),
	"Entropy": f"{ent:.2f}",
	"Math Engine": protogen.accelerator.device_name
	}
	except:
	return {"Status": "Initializing..."}

	with gr.Blocks(theme=gr.themes.Soft()) as demo:
	gr.Markdown("# PROTOGEN V4.0.9 - Scientific Operative Interface")

	with gr.Row():
	with gr.Column(scale=3):
	gr.ChatInterface(fn=protogen_chat, title="Architect Link")

	with gr.Column(scale=1):
	gr.Markdown("### Live Ingestion")
	file_output = gr.Textbox(label="Status", interactive=False)
	upload_btn = gr.File(label="Upload", file_count="multiple")
	upload_btn.upload(fn=handle_file_upload, inputs=upload_btn, outputs=file_output)

	gr.Markdown("### Math Telemetry")
	stats_display = gr.JSON(value=get_stats, label="System Metrics")
	refresh_btn = gr.Button("Refresh Telemetry")
	refresh_btn.click(fn=get_stats, outputs=stats_display)

	if __name__ == "__main__":
	demo.launch()