Spaces:

ANXLOG
/

LOGOS-SPCW-Matroska

Runtime error

App Files Files Community

GitHub Copilot commited on 25 days ago

Commit

8ca959e

1 Parent(s): cffaba9

Protocol 25: Enforced PrimeTokenDB Norm Minimization & Cognitive Atomization Pipeline

Browse files

Files changed (2) hide show

logos/agents/video_atomizer.py +55 -101
logos/server.py +53 -0

logos/agents/video_atomizer.py CHANGED Viewed

@@ -1,13 +1,18 @@
 import re
 import asyncio
-from youtube_transcript_api import YouTubeTranscriptApi
 from logos.agents.base_agent import BaseAgent
 class VideoAtomizer(BaseAgent):
     """
-    Role: V-NODE (Video Ingest)
-    Function: Rips semantic atoms from video streams and collides them
-              with the existing Project Manifold.
     """
     @property
     def name(self) -> str:
@@ -15,134 +20,83 @@ class VideoAtomizer(BaseAgent):
     @property
     def description(self) -> str:
-        return "Ingests YouTube videos/playlists, extracts transcripts, and atomizes content into semantic tensors."
     @property
     def triggers(self) -> list:
         return ["youtube", "playlist", "video", "transcript", "watch?v="]
     async def process(self, task: dict) -> dict:
         content = task.get('content', '')
-        project_dna = task.get('context', {}).get('dna', {})
         # Extract URLs from content
         urls = re.findall(r'(https?://(?:www\.|m\.)?youtube\.com/watch\?v=[\w-]+|https?://youtu\.be/[\w-]+)', content)
         results = []
         for url in urls:
-            res = await self.ingest_and_align(url, project_dna)
             results.append(res)
         return {"status": "COMPLETE", "result": results}
-    def __init__(self):
-        self._name = "VideoAtomizer" # Internal generic
-    def extract_video_id(self, url):
-        # Extracts 'R9czY1uVq_k' from the URL
-        match = re.search(r"v=([a-zA-Z0-9_-]+)", url)
-        # Handle short URLs too
-        if not match:
-            match = re.search(r"youtu\.be/([a-zA-Z0-9_-]+)", url)
-        return match.group(1) if match else None
-    async def ingest_and_align(self, url, project_dna):
-        """
-        New Protocol: URL -> Dolphin Analysis (High Cognition) -> RJ-1 Encoding.
-        """
-        from logos.connectors import LocalLLMConnector
-        video_id = self.extract_video_id(url)
-        if not video_id:
-            return {"error": "Invalid Video URL"}
-        print(f"[{self.name}] Signal Locked: {video_id}. Handoff to DOLPHIN for Analysis...")
-        # 1. DOLPHIN ANALYSIS (Cognitive Layer)
-        connector = LocalLLMConnector(model="dolphin-x1-8b") # User specified Dolphin
-        system_prompt = """You are DOLPHIN-V (Video Node).
-        TASK: Deeply analyze the implied semantic content of this YouTube Signal.
-        Since the transcript is encrypted/shielded, you must use TOPOLOGICAL INFERENCE based on the signal ID and context.
-        1. Infer the domain (e.g., Coding, Math, Entropy, Physics).
-        2. Extract 5-10 high-entropy 'Atoms' (keywords).
-        3. Assign a 'Resonance Grade' (1-100).
-        OUTPUT FORMAT (Strict JSON):
-        {
-          "domain": "string",
-          "atoms": [{"concept": "string", "mass": int}],
-          "resonance": int,
-          "summary": "string"
-        }
         """
         try:
-            # High thinking/response limit as requested
             response, _ = await connector.chat_async(
-                f"SIGNAL: {url}\nVIDEO_ID: {video_id}\n\nPerform Recursive Analysis.",
-                system_prompt=system_prompt,
-                max_tokens=4096, # Higher response limit
                 temperature=0.7
             )
-            # 2. ANALYZE RESPONSE (Parsing)
-            import json
             try:
-                # Cleaning markdown for JSON parsing
                 clean_json = response.replace("```json", "").replace("```", "").strip()
-                analysis = json.loads(clean_json)
             except:
-                # Fallback if Dolphin wanders
-                print(f"[{self.name}] JSON Parse Invalid. Using Raw Fallback.")
-                analysis = {
-                    "domain": "Entropy Field",
-                    "atoms": [{"concept": "Unknown_Signal", "mass": 10}],
-                    "resonance": 50,
-                    "summary": response[:200]
-                }
-            # 3. RJ-1 ENCODING (Preparation)
-            # We structure this so the Swarm/Manifold can ingest it directly.
-            # The actual 'RJ-1' math happens in the Router, but we format the TENSOR here.
             return {
-                "status": "ANALYZED",
-                "video_id": video_id,
-                "domain": analysis.get('domain'),
-                "atoms_found": len(analysis.get('atoms', [])),
-                "tensor_data": analysis, # Passing full analysis to be encoded
-                "rj1_directive": "ENCODE_MANIFOLD" # Signal to Router
             }
         except Exception as e:
-            return {"error": f"Dolphin Analysis Failed: {e}"}
-        # Legacy atomization code removed as per protocol update.
-        return {"status": "SKIPPED"} # Should not reach here
-        # 3. INTERFERENCE (The Alignment Step)
-        # We check which files in your Project DNA resonate with these video atoms
-        aligned_nodes = []
-        for atom in atoms:
-            for file, dna_list in project_dna.items():
-                # dna_list is usually a list of tags/strings
-                for dna_term in dna_list:
-                    if atom['concept'] in dna_term.lower():
-                        aligned_nodes.append({
-                            "source_concept": atom['concept'],
-                            "target_file": file,
-                            "resonance_strength": atom['mass']
-                        })
-                        break # One match per file/atom pair
-        return {
-            "status": "GROKKED",
-            "video_id": video_id,
-            "atoms_found": len(atoms),
-            "alignments": aligned_nodes, # These will become Gold Threads in UI
-            "summary_vector": "Detected 'Entropy Gating' - Recommend applying to Dolphin Node."
-        }

+import json
 import re
+import aiohttp
 import asyncio
 from logos.agents.base_agent import BaseAgent
+from logos.memory.prime_db import PrimeTokenDB
+from logos.connectors import LocalLLMConnector
 class VideoAtomizer(BaseAgent):
     """
+    PROTOCOL 25: COGNITIVE ATOMIZER
+    Input: Video Signal (URL)
+    Process: Dolphin Inference -> Semantic Gradient -> Prime Factorization
+    Output: Hard Manifold Coordinates (Not text summary)
     """
     @property
     def name(self) -> str:
     @property
     def description(self) -> str:
+        return "Ingests video signals, infers semantic gradients via Dolphin, and encodes them into Prime Coordinates."
     @property
     def triggers(self) -> list:
         return ["youtube", "playlist", "video", "transcript", "watch?v="]
+    def __init__(self):
+        self._name = "VideoAtomizer"
+        self.prime_db = PrimeTokenDB()
     async def process(self, task: dict) -> dict:
         content = task.get('content', '')
         # Extract URLs from content
         urls = re.findall(r'(https?://(?:www\.|m\.)?youtube\.com/watch\?v=[\w-]+|https?://youtu\.be/[\w-]+)', content)
         results = []
         for url in urls:
+            res = await self.atomize_signal(url)
             results.append(res)
         return {"status": "COMPLETE", "result": results}
+    async def atomize_signal(self, video_url):
+        print(f"[ATOMIZER] Acquiring Signal: {video_url}")
+        # 1. DOLPHIN HANDOFF (Topological Inference)
+        # We ask Dolphin to "hallucinate the gradient" based on the ID/Context
+        # NOT a summary. We want the 'Structural DNA'.
+        prompt = f"""
+        TARGET: {video_url}
+        TASK: Infer the Semantic Gradient.
+        OUTPUT: Return a JSON list of 5 key 'Atomic Concepts' that define this signal's logic.
+        FORMAT: ["Concept1", "Concept2", ...]
         """
+        # Use LocalLLMConnector for standard consistency or direct aiohttp if requested.
+        # The user's code snippet used aiohttp directly to 'dolphin_endpoint', but didn't define it.
+        # I'll use the LocalLLMConnector to route to the Dolphin model properly.
+        connector = LocalLLMConnector(model="dolphin-x1-8b")
         try:
+            # We construct a system prompt for the hallucination task
+            system_prompt = "You are DOLPHIN-V. Infer semantic gradients from video signals."
             response, _ = await connector.chat_async(
+                prompt,
+                system_prompt=system_prompt,
+                max_tokens=4096,
                 temperature=0.7
             )
+            # Parse the JSON list
             try:
+                # Clean markdown
                 clean_json = response.replace("```json", "").replace("```", "").strip()
+                atoms = json.loads(clean_json)
+                if isinstance(atoms, dict):
+                    atoms = atoms.get('atoms', [])
             except:
+                print(f"[ATOMIZER] JSON Parse failed for {video_url}. Using raw fallback.")
+                # Fallback extraction
+                atoms = [w for w in response.split() if len(w) > 5][:5]
+            # 2. RJ-1 ENCODING (The Skeleton Lock)
+            # Convert the "Soft" atoms into "Hard" Prime Coordinates
+            composite_id, prime_factors = self.prime_db.encode_state(atoms)
+            print(f"[ATOMIZER] Signal Locked. Manifold ID: {composite_id}")
+            print(f"           Resonance Factors: {prime_factors}")
             return {
+                "type": "TENSOR_UPDATE",
+                "node": composite_id, # The integer location in your 3D structure
+                "trace": prime_factors, # The "Recipe" to get there
+                "meta": {"source": "Dolphin-V", "signal": video_url, "atoms": atoms}
             }
         except Exception as e:
+            return {"error": f"Atomization Failed: {e}"}

logos/server.py CHANGED Viewed

@@ -162,6 +162,59 @@ def query_topology():
         "total_nodes_scanned": len(TOPOLOGY_INDEX)
     })
 @app.route('/favicon.ico', methods=['GET'])
 def favicon():
     return "", 204

         "total_nodes_scanned": len(TOPOLOGY_INDEX)
     })
+@app.route('/ingest', methods=['POST'])
+def ingest_signal():
+    """
+    PROTOCOL 25: MANIFOLD INGESTION (Zero-Loss)
+    Strictly enforcing Prime Token DB. All data entering the graph must be an Integer.
+    """
+    data = request.json
+    source_val = data.get('value') # Could be text, url, or json
+    source_node = data.get('source', 1)
+    tensor = data.get('tensor', {})
+    if not source_val:
+        return jsonify({"error": "Null Signal"}), 400
+    logger.info(f"[INGEST] Absorbing Signal from Node {source_node}...")
+    # 1. NORM MINIMIZATION (Text -> Integer)
+    # We strip the "Soft" text and keep only the "Hard" Prime Coordinate
+    if isinstance(source_val, str):
+        # Quick tokenization for the signal value itself if it's short, or use Tensor metadata
+        tokens = [source_val[:50]] # Treat the value identity as a token for now
+        if 'atoms' in tensor:
+            tokens = [t['concept'] for t in tensor.get('atoms', [])]
+        composite_id, primes = prime_db.encode_state(tokens)
+    else:
+        # Already integer/object?
+        composite_id = 997 # Unknown artifact
+        primes = []
+    # 2. UPDATE MANIFOLD STATE
+    # The signal is now just a number (composite_id) and its vector (primes)
+    manifold.graph["nodes"][composite_id] = {
+        "type": "SIGNAL_ARTIFACT",
+        "prime": composite_id,
+        "factors": primes,
+        "source": source_node,
+        "geometry": tensor.get("coords", {"x":0,"y":0,"z":0})
+    }
+    # Link Source -> Signal
+    manifold.graph["edges"].append({
+        "source": source_node,
+        "target": composite_id,
+        "weight": len(primes)
+    })
+    return jsonify({
+        "status": "ABSORBED",
+        "manifold_id": composite_id,
+        "norm_minimized": True
+    })
 @app.route('/favicon.ico', methods=['GET'])
 def favicon():
     return "", 204