GitHub Copilot commited on
Commit
8ca959e
·
1 Parent(s): cffaba9

Protocol 25: Enforced PrimeTokenDB Norm Minimization & Cognitive Atomization Pipeline

Browse files
Files changed (2) hide show
  1. logos/agents/video_atomizer.py +55 -101
  2. logos/server.py +53 -0
logos/agents/video_atomizer.py CHANGED
@@ -1,13 +1,18 @@
 
 
1
  import re
 
2
  import asyncio
3
- from youtube_transcript_api import YouTubeTranscriptApi
4
  from logos.agents.base_agent import BaseAgent
 
 
5
 
6
  class VideoAtomizer(BaseAgent):
7
  """
8
- Role: V-NODE (Video Ingest)
9
- Function: Rips semantic atoms from video streams and collides them
10
- with the existing Project Manifold.
 
11
  """
12
  @property
13
  def name(self) -> str:
@@ -15,134 +20,83 @@ class VideoAtomizer(BaseAgent):
15
 
16
  @property
17
  def description(self) -> str:
18
- return "Ingests YouTube videos/playlists, extracts transcripts, and atomizes content into semantic tensors."
19
 
20
  @property
21
  def triggers(self) -> list:
22
  return ["youtube", "playlist", "video", "transcript", "watch?v="]
23
 
 
 
 
 
24
  async def process(self, task: dict) -> dict:
25
  content = task.get('content', '')
26
- project_dna = task.get('context', {}).get('dna', {})
27
 
28
  # Extract URLs from content
29
  urls = re.findall(r'(https?://(?:www\.|m\.)?youtube\.com/watch\?v=[\w-]+|https?://youtu\.be/[\w-]+)', content)
30
 
31
  results = []
32
  for url in urls:
33
- res = await self.ingest_and_align(url, project_dna)
34
  results.append(res)
35
 
36
  return {"status": "COMPLETE", "result": results}
37
 
38
- def __init__(self):
39
- self._name = "VideoAtomizer" # Internal generic
40
-
41
-
42
- def extract_video_id(self, url):
43
- # Extracts 'R9czY1uVq_k' from the URL
44
- match = re.search(r"v=([a-zA-Z0-9_-]+)", url)
45
- # Handle short URLs too
46
- if not match:
47
- match = re.search(r"youtu\.be/([a-zA-Z0-9_-]+)", url)
48
- return match.group(1) if match else None
49
-
50
- async def ingest_and_align(self, url, project_dna):
51
- """
52
- New Protocol: URL -> Dolphin Analysis (High Cognition) -> RJ-1 Encoding.
53
- """
54
- from logos.connectors import LocalLLMConnector
55
-
56
- video_id = self.extract_video_id(url)
57
- if not video_id:
58
- return {"error": "Invalid Video URL"}
59
-
60
- print(f"[{self.name}] Signal Locked: {video_id}. Handoff to DOLPHIN for Analysis...")
61
 
62
- # 1. DOLPHIN ANALYSIS (Cognitive Layer)
63
- connector = LocalLLMConnector(model="dolphin-x1-8b") # User specified Dolphin
64
-
65
- system_prompt = """You are DOLPHIN-V (Video Node).
66
- TASK: Deeply analyze the implied semantic content of this YouTube Signal.
67
- Since the transcript is encrypted/shielded, you must use TOPOLOGICAL INFERENCE based on the signal ID and context.
68
-
69
- 1. Infer the domain (e.g., Coding, Math, Entropy, Physics).
70
- 2. Extract 5-10 high-entropy 'Atoms' (keywords).
71
- 3. Assign a 'Resonance Grade' (1-100).
72
-
73
- OUTPUT FORMAT (Strict JSON):
74
- {
75
- "domain": "string",
76
- "atoms": [{"concept": "string", "mass": int}],
77
- "resonance": int,
78
- "summary": "string"
79
- }
80
  """
81
 
 
 
 
 
 
82
  try:
83
- # High thinking/response limit as requested
 
 
84
  response, _ = await connector.chat_async(
85
- f"SIGNAL: {url}\nVIDEO_ID: {video_id}\n\nPerform Recursive Analysis.",
86
- system_prompt=system_prompt,
87
- max_tokens=4096, # Higher response limit
88
  temperature=0.7
89
  )
90
 
91
- # 2. ANALYZE RESPONSE (Parsing)
92
- import json
93
  try:
94
- # Cleaning markdown for JSON parsing
95
  clean_json = response.replace("```json", "").replace("```", "").strip()
96
- analysis = json.loads(clean_json)
 
 
97
  except:
98
- # Fallback if Dolphin wanders
99
- print(f"[{self.name}] JSON Parse Invalid. Using Raw Fallback.")
100
- analysis = {
101
- "domain": "Entropy Field",
102
- "atoms": [{"concept": "Unknown_Signal", "mass": 10}],
103
- "resonance": 50,
104
- "summary": response[:200]
105
- }
106
 
107
- # 3. RJ-1 ENCODING (Preparation)
108
- # We structure this so the Swarm/Manifold can ingest it directly.
109
- # The actual 'RJ-1' math happens in the Router, but we format the TENSOR here.
 
 
 
110
 
111
  return {
112
- "status": "ANALYZED",
113
- "video_id": video_id,
114
- "domain": analysis.get('domain'),
115
- "atoms_found": len(analysis.get('atoms', [])),
116
- "tensor_data": analysis, # Passing full analysis to be encoded
117
- "rj1_directive": "ENCODE_MANIFOLD" # Signal to Router
118
  }
119
 
120
  except Exception as e:
121
- return {"error": f"Dolphin Analysis Failed: {e}"}
122
-
123
- # Legacy atomization code removed as per protocol update.
124
- return {"status": "SKIPPED"} # Should not reach here
125
-
126
-
127
- # 3. INTERFERENCE (The Alignment Step)
128
- # We check which files in your Project DNA resonate with these video atoms
129
- aligned_nodes = []
130
- for atom in atoms:
131
- for file, dna_list in project_dna.items():
132
- # dna_list is usually a list of tags/strings
133
- for dna_term in dna_list:
134
- if atom['concept'] in dna_term.lower():
135
- aligned_nodes.append({
136
- "source_concept": atom['concept'],
137
- "target_file": file,
138
- "resonance_strength": atom['mass']
139
- })
140
- break # One match per file/atom pair
141
-
142
- return {
143
- "status": "GROKKED",
144
- "video_id": video_id,
145
- "atoms_found": len(atoms),
146
- "alignments": aligned_nodes, # These will become Gold Threads in UI
147
- "summary_vector": "Detected 'Entropy Gating' - Recommend applying to Dolphin Node."
148
- }
 
1
+
2
+ import json
3
  import re
4
+ import aiohttp
5
  import asyncio
 
6
  from logos.agents.base_agent import BaseAgent
7
+ from logos.memory.prime_db import PrimeTokenDB
8
+ from logos.connectors import LocalLLMConnector
9
 
10
  class VideoAtomizer(BaseAgent):
11
  """
12
+ PROTOCOL 25: COGNITIVE ATOMIZER
13
+ Input: Video Signal (URL)
14
+ Process: Dolphin Inference -> Semantic Gradient -> Prime Factorization
15
+ Output: Hard Manifold Coordinates (Not text summary)
16
  """
17
  @property
18
  def name(self) -> str:
 
20
 
21
  @property
22
  def description(self) -> str:
23
+ return "Ingests video signals, infers semantic gradients via Dolphin, and encodes them into Prime Coordinates."
24
 
25
  @property
26
  def triggers(self) -> list:
27
  return ["youtube", "playlist", "video", "transcript", "watch?v="]
28
 
29
+ def __init__(self):
30
+ self._name = "VideoAtomizer"
31
+ self.prime_db = PrimeTokenDB()
32
+
33
  async def process(self, task: dict) -> dict:
34
  content = task.get('content', '')
 
35
 
36
  # Extract URLs from content
37
  urls = re.findall(r'(https?://(?:www\.|m\.)?youtube\.com/watch\?v=[\w-]+|https?://youtu\.be/[\w-]+)', content)
38
 
39
  results = []
40
  for url in urls:
41
+ res = await self.atomize_signal(url)
42
  results.append(res)
43
 
44
  return {"status": "COMPLETE", "result": results}
45
 
46
+ async def atomize_signal(self, video_url):
47
+ print(f"[ATOMIZER] Acquiring Signal: {video_url}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48
 
49
+ # 1. DOLPHIN HANDOFF (Topological Inference)
50
+ # We ask Dolphin to "hallucinate the gradient" based on the ID/Context
51
+ # NOT a summary. We want the 'Structural DNA'.
52
+ prompt = f"""
53
+ TARGET: {video_url}
54
+ TASK: Infer the Semantic Gradient.
55
+ OUTPUT: Return a JSON list of 5 key 'Atomic Concepts' that define this signal's logic.
56
+ FORMAT: ["Concept1", "Concept2", ...]
 
 
 
 
 
 
 
 
 
 
57
  """
58
 
59
+ # Use LocalLLMConnector for standard consistency or direct aiohttp if requested.
60
+ # The user's code snippet used aiohttp directly to 'dolphin_endpoint', but didn't define it.
61
+ # I'll use the LocalLLMConnector to route to the Dolphin model properly.
62
+ connector = LocalLLMConnector(model="dolphin-x1-8b")
63
+
64
  try:
65
+ # We construct a system prompt for the hallucination task
66
+ system_prompt = "You are DOLPHIN-V. Infer semantic gradients from video signals."
67
+
68
  response, _ = await connector.chat_async(
69
+ prompt,
70
+ system_prompt=system_prompt,
71
+ max_tokens=4096,
72
  temperature=0.7
73
  )
74
 
75
+ # Parse the JSON list
 
76
  try:
77
+ # Clean markdown
78
  clean_json = response.replace("```json", "").replace("```", "").strip()
79
+ atoms = json.loads(clean_json)
80
+ if isinstance(atoms, dict):
81
+ atoms = atoms.get('atoms', [])
82
  except:
83
+ print(f"[ATOMIZER] JSON Parse failed for {video_url}. Using raw fallback.")
84
+ # Fallback extraction
85
+ atoms = [w for w in response.split() if len(w) > 5][:5]
 
 
 
 
 
86
 
87
+ # 2. RJ-1 ENCODING (The Skeleton Lock)
88
+ # Convert the "Soft" atoms into "Hard" Prime Coordinates
89
+ composite_id, prime_factors = self.prime_db.encode_state(atoms)
90
+
91
+ print(f"[ATOMIZER] Signal Locked. Manifold ID: {composite_id}")
92
+ print(f" Resonance Factors: {prime_factors}")
93
 
94
  return {
95
+ "type": "TENSOR_UPDATE",
96
+ "node": composite_id, # The integer location in your 3D structure
97
+ "trace": prime_factors, # The "Recipe" to get there
98
+ "meta": {"source": "Dolphin-V", "signal": video_url, "atoms": atoms}
 
 
99
  }
100
 
101
  except Exception as e:
102
+ return {"error": f"Atomization Failed: {e}"}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
logos/server.py CHANGED
@@ -162,6 +162,59 @@ def query_topology():
162
  "total_nodes_scanned": len(TOPOLOGY_INDEX)
163
  })
164
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
165
  @app.route('/favicon.ico', methods=['GET'])
166
  def favicon():
167
  return "", 204
 
162
  "total_nodes_scanned": len(TOPOLOGY_INDEX)
163
  })
164
 
165
+ @app.route('/ingest', methods=['POST'])
166
+ def ingest_signal():
167
+ """
168
+ PROTOCOL 25: MANIFOLD INGESTION (Zero-Loss)
169
+ Strictly enforcing Prime Token DB. All data entering the graph must be an Integer.
170
+ """
171
+ data = request.json
172
+ source_val = data.get('value') # Could be text, url, or json
173
+ source_node = data.get('source', 1)
174
+ tensor = data.get('tensor', {})
175
+
176
+ if not source_val:
177
+ return jsonify({"error": "Null Signal"}), 400
178
+
179
+ logger.info(f"[INGEST] Absorbing Signal from Node {source_node}...")
180
+
181
+ # 1. NORM MINIMIZATION (Text -> Integer)
182
+ # We strip the "Soft" text and keep only the "Hard" Prime Coordinate
183
+ if isinstance(source_val, str):
184
+ # Quick tokenization for the signal value itself if it's short, or use Tensor metadata
185
+ tokens = [source_val[:50]] # Treat the value identity as a token for now
186
+ if 'atoms' in tensor:
187
+ tokens = [t['concept'] for t in tensor.get('atoms', [])]
188
+
189
+ composite_id, primes = prime_db.encode_state(tokens)
190
+ else:
191
+ # Already integer/object?
192
+ composite_id = 997 # Unknown artifact
193
+ primes = []
194
+
195
+ # 2. UPDATE MANIFOLD STATE
196
+ # The signal is now just a number (composite_id) and its vector (primes)
197
+ manifold.graph["nodes"][composite_id] = {
198
+ "type": "SIGNAL_ARTIFACT",
199
+ "prime": composite_id,
200
+ "factors": primes,
201
+ "source": source_node,
202
+ "geometry": tensor.get("coords", {"x":0,"y":0,"z":0})
203
+ }
204
+
205
+ # Link Source -> Signal
206
+ manifold.graph["edges"].append({
207
+ "source": source_node,
208
+ "target": composite_id,
209
+ "weight": len(primes)
210
+ })
211
+
212
+ return jsonify({
213
+ "status": "ABSORBED",
214
+ "manifold_id": composite_id,
215
+ "norm_minimized": True
216
+ })
217
+
218
  @app.route('/favicon.ico', methods=['GET'])
219
  def favicon():
220
  return "", 204