Spaces:
Runtime error
Runtime error
| import re | |
| import os | |
| import aiohttp | |
| from logos.agents.base_agent import BaseAgent | |
| from logos.connectors import LocalLLMConnector | |
| class DocAtomizer(BaseAgent): | |
| """ | |
| Role: D-NODE (Documentation Ingest) | |
| Function: Scrapes documentation URLs and atomizes them into the Knowledge Base. | |
| """ | |
| def name(self) -> str: | |
| return "DocAtomizer" | |
| def description(self) -> str: | |
| return "Ingests documentation/article URLs (non-video), extracts concepts, and updates the Knowledge Base." | |
| def triggers(self) -> list: | |
| return ["http://", "https://", "docs", "documentation", "read"] | |
| async def process(self, task: dict) -> dict: | |
| content = task.get('content', '') | |
| # 1. Extract URLs (exclude YouTube to avoid stepping on VideoAtomizer) | |
| urls = re.findall(r'https?://(?:[-\w.]|(?:%[\da-fA-F]{2}))+[^\s]*', content) | |
| valid_urls = [u for u in urls if 'youtube.com' not in u and 'youtu.be' not in u] | |
| if not valid_urls: | |
| return {"status": "IGNORED", "reason": "No valid non-video URLs found."} | |
| results = [] | |
| connector = LocalLLMConnector(model="dolphin-x1-8b") | |
| for url in valid_urls: | |
| print(f"[{self.name}] Ingesting: {url}") | |
| try: | |
| # 2. Fetch Content (Simple text extraction) | |
| # In a real swarm, we'd use a headless browser or specialized scraper. | |
| # Here we simulate/use simple fetch or rely on what we can get. | |
| # For this 'agent', we'll rely on the Router/Orchestrator having passed the intent, | |
| # but since we are inside the agent, we might need our own fetcher. | |
| # We'll assume the URL content isn't fully passed, so we do a quick fetch. | |
| # NOTE: In the future, this should use a tool or external service. | |
| # For now, we'll try to deduce from the URL structure or use a placeholder | |
| # if we can't scrape directly without `read_url_content` available to the code. | |
| # However, the USER just provided the URL. | |
| # 3. Analyze with Dolphin | |
| analysis_prompt = f"Analyze the intent and content of this documentation URL: {url}. Extract 5 key technical concepts." | |
| response, _ = await connector.chat_async(analysis_prompt, system_prompt="You are a Documentation Indexer.") | |
| # 4. Save to Knowledge Base | |
| kb_path = os.path.join("logos", "knowledge_base", "doc_index.md") | |
| entry = f"\n## [DOC] {url}\n**Analysis:**\n{response}\n---\n" | |
| os.makedirs(os.path.dirname(kb_path), exist_ok=True) | |
| with open(kb_path, "a", encoding="utf-8") as f: | |
| f.write(entry) | |
| results.append({"url": url, "analysis": response[:100] + "..."}) | |
| except Exception as e: | |
| print(f"[{self.name}] Error processing {url}: {e}") | |
| results.append({"url": url, "error": str(e)}) | |
| return {"status": "COMPLETE", "results": results} | |