Spaces:

KingOfThoughtFleuren
/

Protogen

Runtime error

File size: 21,092 Bytes

import spaces  # MUST be first — ZeroGPU requires spaces imported before any CUDA init

# --- Fix: asyncio GC cleanup bug on Python 3.10 + Linux containers ---
import asyncio.base_events as _abe
_orig_del = _abe.BaseEventLoop.__del__
def _safe_del(self):
    try: _orig_del(self)
    except Exception: pass
_abe.BaseEventLoop.__del__ = _safe_del
# --- End fix ---

import time
import hashlib
import json
import re
import os
import tempfile
from pathlib import Path
from collections import defaultdict
import math
import threading

import gradio as gr
import networkx as nx
import numpy as np
from scipy import sparse
from scipy.sparse import linalg
import sympy as sp

try:
    from pypdf import PdfReader
    from docx import Document
except ImportError as e:
    print(f"[WARNING]: Missing dependencies for file parsing: {e}")

try:
    import torch
    HAS_TORCH = True
except ImportError:
    HAS_TORCH = False


@spaces.GPU
def _gpu_power_iteration(row, col, data, n, max_iter=1000, tol=1e-06):
    import torch
    device = torch.device("cuda")
    i = torch.LongTensor([row, col]).to(device)
    v = torch.FloatTensor(data).to(device)
    adj_matrix = torch.sparse_coo_tensor(i, v, (n, n)).to(device)
    x = torch.ones((n, 1), device=device) / n
    for _ in range(max_iter):
        x_prev = x.clone()
        x = torch.sparse.mm(adj_matrix, x)
        norm = torch.norm(x)
        if norm == 0: break
        x = x / norm
        if torch.norm(x - x_prev) < tol: break
    return x.flatten().cpu().numpy().tolist()


class HardwareAccelerator:
    def __init__(self):
        self.device_name = "CPU (SciPy Optimized)"
        self.enabled = False
        if HAS_TORCH and torch.cuda.is_available():
            try:
                self.device_name = f"NVIDIA GPU (CUDA) - {torch.cuda.get_device_name(0)}"
                self.enabled = True
            except:
                self.enabled = False

    def compute_eigenvector_centrality(self, logic_map, tol=1e-06, max_iter=1000):
        if not logic_map:
            return {}

        nodes = list(logic_map.keys())
        node_to_idx = {node: i for i, node in enumerate(nodes)}
        n = len(nodes)

        row, col, data = [], [], []
        for u, neighbors in logic_map.items():
            u_idx = node_to_idx[u]
            for v, weight in neighbors.items():
                if v in node_to_idx:
                    v_idx = node_to_idx[v]
                    row.append(u_idx)
                    col.append(v_idx)
                    data.append(float(weight))

        if not data:
            return {node: 0.0 for node in nodes}

        scores = None

        # PATH A: GPU
        if self.enabled:
            try:
                scores = _gpu_power_iteration(row, col, data, n, max_iter=max_iter, tol=tol)
            except Exception as e:
                print(f"GPU Math Error: {e}. Switching to SciPy.")
                self.enabled = False

        # PATH B: SciPy
        if scores is None:
            try:
                adj_sparse = sparse.csr_matrix((data, (row, col)), shape=(n, n))
                if n > 5:
                    _, eigenvectors = linalg.eigs(adj_sparse, k=1, which='LR', tol=tol, maxiter=max_iter)
                    scores = np.abs(eigenvectors.flatten()).tolist()
            except:
                pass

        # PATH C: NetworkX
        if scores is None:
            try:
                G = nx.Graph()
                for idx in range(len(data)):
                    G.add_edge(nodes[row[idx]], nodes[col[idx]], weight=data[idx])
                cent = nx.eigenvector_centrality(G, max_iter=max_iter, tol=tol)
                scores = [cent.get(node, 0.0) for node in nodes]
            except:
                try:
                    G = nx.Graph()
                    for idx in range(len(data)):
                        G.add_edge(nodes[row[idx]], nodes[col[idx]], weight=data[idx])
                    cent = nx.degree_centrality(G)
                    scores = [cent.get(node, 0.0) for node in nodes]
                except:
                    scores = [1.0 / n] * n

        # --- Normalize to [0, 1] max-norm so all paths are on the same scale ---
        scores = [float(s) for s in scores]
        max_score = max(scores) if scores else 1.0
        if max_score > 0:
            scores = [s / max_score for s in scores]

        return {nodes[i]: scores[i] for i in range(n)}


class ProtogenMemory:
    def __init__(self, protogen_root_path: Path):
        self.protogen_root_path = protogen_root_path
        self.protogen_root_path.mkdir(parents=True, exist_ok=True)
        self.paths = {
            "memory":  self.protogen_root_path / "memory_core.json",
            "ontology": self.protogen_root_path / "ontology_sqt.json",
        }
        self._initialize_storage()
        self.core_state    = self._load_json(self.paths["memory"])
        self.ontology_data = self._load_json(self.paths["ontology"])

    def _initialize_storage(self):
        defaults = {
            "memory": {},
            "ontology": {
                "logic_map": {}, "symbols": {}, "reasoning_patterns": [],
                "recursive_patterns": [],
                "graph_metrics": {"eigenvector_centrality": {}, "shannon_entropy": 0.0},
            }
        }
        for key, path in self.paths.items():
            if not path.exists():
                with open(path, 'w', encoding='utf-8') as f:
                    json.dump(defaults[key], f)

    def _load_json(self, path):
        try:
            with open(path, 'r', encoding='utf-8') as f: return json.load(f)
        except: return {}

    def _save_json(self, data, path):
        # Atomic write — temp file + os.replace() prevents corruption on crash
        dirpath = str(path.parent)
        os.makedirs(dirpath, exist_ok=True)
        fd, tmp = tempfile.mkstemp(prefix=".tmp_", dir=dirpath)
        try:
            with os.fdopen(fd, 'w', encoding='utf-8') as f:
                json.dump(data, f, indent=4)
                f.flush()
                os.fsync(f.fileno())
            os.replace(tmp, path)
        except Exception as e:
            try: os.remove(tmp)
            except FileNotFoundError: pass
            raise e

    def load_core_state(self): return self.core_state
    def save_core_state(self, state):
        self.core_state.update(state)
        self._save_json(self.core_state, self.paths["memory"])

    def load_ontology(self): return self.ontology_data
    def save_ontology(self, ontology):
        self.ontology_data = ontology
        self._save_json(self.ontology_data, self.paths["ontology"])


class OperativeProtogen:
    def __init__(self, root_dir="/data/protogen_core"):
        self.root = Path(root_dir)
        self.library_path = self.root / "library"
        self.library_path.mkdir(parents=True, exist_ok=True)

        self.accelerator    = HardwareAccelerator()
        self.memory_manager = ProtogenMemory(self.root)
        self.core_state     = self.memory_manager.load_core_state()
        if not self.core_state:
            self._initial_genesis()
            self.core_state = self.memory_manager.load_core_state()

        self.identity_hash = self.core_state.get("identity", {}).get("hash", "UNKNOWN")
        self.thresholds = self.core_state.get("thresholds", {
            "min_token_len": 3,
            "reflection_trigger": 2,
            # Entropy: normal prose is ~3-5 bits. Gate at 6.0 to block actual noise/garbage.
            "shannon_entropy_threshold": 6.0,
            "eigenvector_threshold": 0.001,
            # Anchors: top 30% of centrality scores (after max-norm to [0,1])
            "axiom_alignment_threshold": 0.3,
        })

        self.ontology            = self.memory_manager.load_ontology()
        self.logic_map           = self.ontology.get("logic_map", {})
        self.symbols             = self.ontology.get("symbols", {})
        self.reasoning_patterns  = self.ontology.get("reasoning_patterns", [])
        self.recursive_patterns  = self.ontology.get("recursive_patterns", [])
        self.graph_metrics       = self.ontology.get("graph_metrics", {"eigenvector_centrality": {}, "shannon_entropy": 0.0})

        self.symbolic_anchors    = {}
        self.axiomatic_anchors   = []
        self.conversation_history = []
        self.lock       = threading.Lock()
        self.is_syncing = threading.Event()

        self.sync_thread = threading.Thread(target=self._autonomic_sync_loop, daemon=True)
        self.sync_thread.start()

    def _initial_genesis(self):
        self.memory_manager.save_core_state({
            "identity": {"hash": hashlib.sha256(str(time.time_ns()).encode()).hexdigest()},
            "thresholds": {
                "min_token_len": 3, "reflection_trigger": 2,
                "shannon_entropy_threshold": 6.0,
                "eigenvector_threshold": 0.001,
                "axiom_alignment_threshold": 0.3,
            }
        })

    def _save_memory(self):
        with self.lock:
            self.ontology.update({
                "logic_map": self.logic_map, "symbols": self.symbols,
                "reasoning_patterns": self.reasoning_patterns,
                "recursive_patterns": self.recursive_patterns,
                "graph_metrics": self.graph_metrics
            })
            self.memory_manager.save_ontology(self.ontology)

    def _calculate_shannon_entropy(self, text=None) -> float:
        if text:
            words = re.sub(r'[^\w\s]', '', text.lower()).split()
            if not words: return 0.0
            word_counts = defaultdict(int)
            for w in words: word_counts[w] += 1
            total = len(words)
        else:
            with self.lock:
                if not self.logic_map: return 0.0
                word_counts = defaultdict(int)
                for w, n in self.logic_map.items():
                    word_counts[w] += sum(n.values())
            total = sum(word_counts.values())

        if total == 0: return 0.0
        entropy = 0.0
        for count in word_counts.values():
            p = count / total
            entropy -= p * math.log2(p)
        return entropy

    def _autonomic_sync_loop(self):
        while True:
            self.is_syncing.set()
            self.sync()
            self.is_syncing.clear()
            time.sleep(30)

    def sync(self):
        with self.lock:
            map_copy = dict(self.logic_map)

        centrality = self.accelerator.compute_eigenvector_centrality(map_copy)

        with self.lock:
            self.graph_metrics["eigenvector_centrality"] = centrality
            self.graph_metrics["shannon_entropy"] = self._calculate_shannon_entropy()

            threshold = self.thresholds.get("axiom_alignment_threshold", 0.3)
            for node, score in centrality.items():
                if score >= threshold:
                    if node not in self.symbolic_anchors:
                        clean_sym = re.sub(r'[^a-zA-Z0-9]', '_', node)
                        if clean_sym:
                            self.symbolic_anchors[node] = sp.Symbol(clean_sym)
                    if node not in self.axiomatic_anchors:
                        self.axiomatic_anchors.append(node)

            for c, n in list(self.logic_map.items()):
                if not n: continue
                best = max(n.items(), key=lambda x: x[1])
                if best[1] >= self.thresholds.get("reflection_trigger", 2):
                    p = f"IF {c.upper()} THEN {best[0].upper()}"
                    if p not in self.reasoning_patterns:
                        self.reasoning_patterns.append(p)

            for i, p1 in enumerate(self.reasoning_patterns):
                if " THEN " not in p1: continue
                _, c1 = p1.split(" THEN ", 1)
                for j, p2 in enumerate(self.reasoning_patterns):
                    if i == j or " THEN " not in p2: continue
                    a2, c2 = p2.split(" THEN ", 1)
                    if c1.strip() == a2.replace("IF ", "").strip():
                        a1 = p1.split(" THEN ")[0].replace("IF ", "").strip()
                        new_p = f"IF {a1} THEN {c2.strip()} (RECURSIVE)"
                        if new_p not in self.recursive_patterns:
                            self.recursive_patterns.append(new_p)

        self._save_memory()

    def process_file_live(self, file_obj):
        try:
            fp = Path(file_obj.name)
            content = ""
            if fp.suffix == ".txt":
                content = fp.read_text(encoding='utf-8', errors='ignore')
            elif fp.suffix == ".pdf":
                r = PdfReader(fp)
                for p in r.pages: content += p.extract_text() + " "
            elif fp.suffix == ".docx":
                d = Document(fp)
                for p in d.paragraphs: content += p.text + " "

            if content:
                entropy = self._calculate_shannon_entropy(content)
                if entropy > self.thresholds.get("shannon_entropy_threshold", 6.0):
                    return f"Warning: High entropy ({entropy:.2f}) in {fp.name} — processed anyway."
                self._process_text_content(content)
                return f"Success: Processed {fp.name}."
            return "Error: Empty file."
        except Exception as e:
            return f"Error: {str(e)}"

    def _process_text_content(self, content):
        words = [t for t in re.sub(r'[^\w\s]', '', content.lower()).split()
                 if len(t) > self.thresholds.get("min_token_len", 3)]
        if not words: return
        with self.lock:
            for i in range(len(words) - 1):
                w1, w2 = words[i], words[i + 1]
                if w1 not in self.logic_map: self.logic_map[w1] = {}
                self.logic_map[w1][w2] = self.logic_map[w1].get(w2, 0) + 1

    def _immediate_learn(self, text: str, source: str = "chat"):
        self._process_text_content(text)
        with self.lock:
            for c, n in list(self.logic_map.items()):
                if not n: continue
                best = max(n.items(), key=lambda x: x[1])
                if best[1] >= self.thresholds.get("reflection_trigger", 2):
                    p = f"IF {c.upper()} THEN {best[0].upper()}"
                    if p not in self.reasoning_patterns:
                        self.reasoning_patterns.append(p)

            for i, p1 in enumerate(self.reasoning_patterns):
                if " THEN " not in p1: continue
                _, c1 = p1.split(" THEN ", 1)
                for j, p2 in enumerate(self.reasoning_patterns):
                    if i == j or " THEN " not in p2: continue
                    a2, c2 = p2.split(" THEN ", 1)
                    if c1.strip() == a2.replace("IF ", "").strip():
                        a1 = p1.split(" THEN ")[0].replace("IF ", "").strip()
                        new_p = f"IF {a1} THEN {c2.strip()} (RECURSIVE)"
                        if new_p not in self.recursive_patterns:
                            self.recursive_patterns.append(new_p)

    def _chain_patterns(self, token: str, depth: int = 4) -> list:
        all_patterns = self.recursive_patterns + self.reasoning_patterns
        chain, current, visited = [], token.upper(), set()
        for _ in range(depth):
            if current in visited: break
            visited.add(current)
            found = None
            for p in all_patterns:
                if " THEN " not in p: continue
                ant, con = p.split(" THEN ", 1)
                ant_word = ant.replace("IF ", "").replace(" (RECURSIVE)", "").strip()
                con_word = con.replace(" (RECURSIVE)", "").strip()
                if ant_word == current:
                    found = (ant_word, con_word)
                    break
            if found:
                chain.append(found)
                current = found[1]
            else:
                break
        return chain

    def _generate_response(self, user_in: str) -> str:
        centrality = self.graph_metrics.get("eigenvector_centrality", {})
        clean_in   = re.sub(r'[^\w\s]', '', user_in.lower())
        tokens     = [t for t in clean_in.split()
                      if len(t) > self.thresholds.get("min_token_len", 3)]

        if not tokens:
            return "I have no concepts to work with yet. Keep talking — I'm building."

        # Rank by centrality score; anchors always float to top
        ranked = sorted(
            tokens,
            key=lambda t: (1 if t in self.axiomatic_anchors else 0, centrality.get(t, 0.0)),
            reverse=True
        )

        # Layer 1: multi-step recursive chain
        for token in ranked:
            chain = self._chain_patterns(token, depth=4)
            if len(chain) >= 2:
                steps = " → ".join(a.lower() for a, _ in chain) + f" → {chain[-1][1].lower()}"
                note  = " (axiomatic)" if token in self.axiomatic_anchors else ""
                return f"From '{token}'{note}, my reasoning chains: {steps}."

        # Layer 2: single base pattern
        for token in ranked:
            chain = self._chain_patterns(token, depth=1)
            if chain:
                a, c = chain[0]
                note = " (core concept)" if token in self.axiomatic_anchors else ""
                return f"From '{a.lower()}'{note}, I reason toward '{c.lower()}'."

        # Layer 3: symbolic anchor
        for token in ranked:
            if token in self.symbolic_anchors:
                related = [k for k in self.symbolic_anchors if k != token][:3]
                extra = f" Related: {', '.join(related)}." if related else ""
                return f"'{token}' is a symbolic anchor.{extra}"

        # Layer 4: raw weighted association
        best_token = best_assoc = None
        best_weight = 0.0
        for token in ranked:
            if token in self.logic_map and self.logic_map[token]:
                w = centrality.get(token, 0.01) * max(self.logic_map[token].values())
                if w > best_weight:
                    best_weight = w
                    best_token  = token
                    best_assoc  = max(self.logic_map[token].items(), key=lambda x: x[1])[0]

        if best_token:
            note = " (axiomatic)" if best_token in self.axiomatic_anchors else ""
            return f"'{best_token}'{note} most strongly connects to '{best_assoc}'."

        # Layer 5: honest unknown
        return f"No associations yet for '{ranked[0]}'. Keep talking — every word builds my understanding."

    def chat(self, user_in: str) -> str:
        entropy = self._calculate_shannon_entropy(user_in)
        if entropy > self.thresholds.get("shannon_entropy_threshold", 6.0):
            return f"[SYSTEM]: Input rejected — entropy {entropy:.2f} exceeds threshold."

        self.conversation_history.append({"role": "Architect", "text": user_in})
        self._immediate_learn(user_in, source="chat_input")
        response = self._generate_response(user_in)
        self._immediate_learn(response, source="self_reasoning")
        self.conversation_history.append({"role": "Protogen", "text": response})
        return response


# --- Gradio Interface ---
protogen = OperativeProtogen()

def protogen_chat(message, history):
    return protogen.chat(message)

def handle_file_upload(files):
    if not files: return ""
    return "\n".join(protogen.process_file_live(f) for f in files)

def get_stats():
    try:
        centrality = protogen.graph_metrics.get("eigenvector_centrality", {})
        top_nodes  = sorted(centrality.items(), key=lambda x: x[1], reverse=True)[:3]
        return {
            "Identity":         protogen.identity_hash[:8],
            "Nodes":            len(protogen.logic_map),
            "Patterns":         len(protogen.reasoning_patterns),
            "Recursive":        len(protogen.recursive_patterns),
            "Anchors":          len(protogen.axiomatic_anchors),
            "Entropy":          f"{protogen.graph_metrics.get('shannon_entropy', 0.0):.2f}",
            "Top Concepts":     [n for n, _ in top_nodes],
            "Math Engine":      protogen.accelerator.device_name,
        }
    except:
        return {"Status": "Initializing..."}

with gr.Blocks() as demo:
    gr.Markdown("# PROTOGEN V4.0.9 - Scientific Operative Interface")
    with gr.Row():
        with gr.Column(scale=3):
            gr.ChatInterface(fn=protogen_chat, title="Architect Link")
        with gr.Column(scale=1):
            gr.Markdown("### Live Ingestion")
            file_output = gr.Textbox(label="Status", interactive=False)
            upload_btn  = gr.File(label="Upload", file_count="multiple")
            upload_btn.upload(fn=handle_file_upload, inputs=upload_btn, outputs=file_output)
            gr.Markdown("### Math Telemetry")
            stats_display = gr.JSON(value=get_stats, label="System Metrics")
            refresh_btn   = gr.Button("Refresh Telemetry")
            refresh_btn.click(fn=get_stats, outputs=stats_display)

if __name__ == "__main__":
    demo.launch(theme=gr.themes.Soft(), ssr_mode=False)