Spaces:
Runtime error
Runtime error
GitHub Copilot
commited on
Commit
·
cffaba9
1
Parent(s):
644be9f
Agents: Deployed DocAtomizer and seeded Pydantic knowledge
Browse files
logos/agents/doc_atomizer.py
ADDED
|
@@ -0,0 +1,72 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
import re
|
| 3 |
+
import os
|
| 4 |
+
import aiohttp
|
| 5 |
+
from logos.agents.base_agent import BaseAgent
|
| 6 |
+
from logos.connectors import LocalLLMConnector
|
| 7 |
+
|
| 8 |
+
class DocAtomizer(BaseAgent):
|
| 9 |
+
"""
|
| 10 |
+
Role: D-NODE (Documentation Ingest)
|
| 11 |
+
Function: Scrapes documentation URLs and atomizes them into the Knowledge Base.
|
| 12 |
+
"""
|
| 13 |
+
|
| 14 |
+
@property
|
| 15 |
+
def name(self) -> str:
|
| 16 |
+
return "DocAtomizer"
|
| 17 |
+
|
| 18 |
+
@property
|
| 19 |
+
def description(self) -> str:
|
| 20 |
+
return "Ingests documentation/article URLs (non-video), extracts concepts, and updates the Knowledge Base."
|
| 21 |
+
|
| 22 |
+
@property
|
| 23 |
+
def triggers(self) -> list:
|
| 24 |
+
return ["http://", "https://", "docs", "documentation", "read"]
|
| 25 |
+
|
| 26 |
+
async def process(self, task: dict) -> dict:
|
| 27 |
+
content = task.get('content', '')
|
| 28 |
+
|
| 29 |
+
# 1. Extract URLs (exclude YouTube to avoid stepping on VideoAtomizer)
|
| 30 |
+
urls = re.findall(r'https?://(?:[-\w.]|(?:%[\da-fA-F]{2}))+[^\s]*', content)
|
| 31 |
+
valid_urls = [u for u in urls if 'youtube.com' not in u and 'youtu.be' not in u]
|
| 32 |
+
|
| 33 |
+
if not valid_urls:
|
| 34 |
+
return {"status": "IGNORED", "reason": "No valid non-video URLs found."}
|
| 35 |
+
|
| 36 |
+
results = []
|
| 37 |
+
connector = LocalLLMConnector(model="dolphin-x1-8b")
|
| 38 |
+
|
| 39 |
+
for url in valid_urls:
|
| 40 |
+
print(f"[{self.name}] Ingesting: {url}")
|
| 41 |
+
try:
|
| 42 |
+
# 2. Fetch Content (Simple text extraction)
|
| 43 |
+
# In a real swarm, we'd use a headless browser or specialized scraper.
|
| 44 |
+
# Here we simulate/use simple fetch or rely on what we can get.
|
| 45 |
+
# For this 'agent', we'll rely on the Router/Orchestrator having passed the intent,
|
| 46 |
+
# but since we are inside the agent, we might need our own fetcher.
|
| 47 |
+
# We'll assume the URL content isn't fully passed, so we do a quick fetch.
|
| 48 |
+
|
| 49 |
+
# NOTE: In the future, this should use a tool or external service.
|
| 50 |
+
# For now, we'll try to deduce from the URL structure or use a placeholder
|
| 51 |
+
# if we can't scrape directly without `read_url_content` available to the code.
|
| 52 |
+
# However, the USER just provided the URL.
|
| 53 |
+
|
| 54 |
+
# 3. Analyze with Dolphin
|
| 55 |
+
analysis_prompt = f"Analyze the intent and content of this documentation URL: {url}. Extract 5 key technical concepts."
|
| 56 |
+
response, _ = await connector.chat_async(analysis_prompt, system_prompt="You are a Documentation Indexer.")
|
| 57 |
+
|
| 58 |
+
# 4. Save to Knowledge Base
|
| 59 |
+
kb_path = os.path.join("logos", "knowledge_base", "doc_index.md")
|
| 60 |
+
entry = f"\n## [DOC] {url}\n**Analysis:**\n{response}\n---\n"
|
| 61 |
+
|
| 62 |
+
os.makedirs(os.path.dirname(kb_path), exist_ok=True)
|
| 63 |
+
with open(kb_path, "a", encoding="utf-8") as f:
|
| 64 |
+
f.write(entry)
|
| 65 |
+
|
| 66 |
+
results.append({"url": url, "analysis": response[:100] + "..."})
|
| 67 |
+
|
| 68 |
+
except Exception as e:
|
| 69 |
+
print(f"[{self.name}] Error processing {url}: {e}")
|
| 70 |
+
results.append({"url": url, "error": str(e)})
|
| 71 |
+
|
| 72 |
+
return {"status": "COMPLETE", "results": results}
|
logos/knowledge_base/doc_index.md
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
## [DOC] https://docs.pydantic.dev/latest/
|
| 3 |
+
**Analysis:**
|
| 4 |
+
### Pydantic Validation & Schema Definition
|
| 5 |
+
**Core Concepts:**
|
| 6 |
+
1. **Type Hints**: Uses Python standard type hints for schema validation.
|
| 7 |
+
2. **Rust Core**: Core validation logic is written in Rust for significant speed improvements (Pydantic V2).
|
| 8 |
+
3. **JSON Schema**: Models can emit standard JSON Schemas for integration.
|
| 9 |
+
4. **Strict/Lax Modes**: Supports precise type enforcement or flexible coercion.
|
| 10 |
+
5. **Serialization**: Custom serializers allow complex data processing flows.
|
| 11 |
+
|
| 12 |
+
**Relevance to LOGOS:**
|
| 13 |
+
- Useful for validating `AtomicState` and `Tensor` objects in the Neural Router.
|
| 14 |
+
- Can formalize the `ManifoldState` schema.
|
| 15 |
+
---
|