EDEN OS v1.0 — Phase One complete | OWN THE SCIENCE
Browse filesThis view is limited to 50 files because it contains too many changes. See raw diff
- .dockerignore +9 -0
- .gitignore +11 -0
- CLAUDE.md +559 -0
- Dockerfile +38 -0
- app.py +163 -0
- config/default.yaml +62 -0
- config/eden_protocol.yaml +25 -0
- config/hardware_profiles/cpu_edge.yaml +21 -0
- config/hardware_profiles/h100_cinematic.yaml +21 -0
- config/hardware_profiles/l4_cloud.yaml +19 -0
- config/hardware_profiles/rtx3090_standard.yaml +20 -0
- config/hardware_profiles/rtx4090_production.yaml +19 -0
- docker-compose.yml +22 -0
- eden_os/__init__.py +0 -0
- eden_os/animator/__init__.py +4 -0
- eden_os/animator/animator_engine.py +190 -0
- eden_os/animator/audio_to_keypoints.py +146 -0
- eden_os/animator/eden_temporal_anchor.py +168 -0
- eden_os/animator/idle_generator.py +226 -0
- eden_os/animator/liveportrait_driver.py +278 -0
- eden_os/animator/state_machine.py +188 -0
- eden_os/brain/__init__.py +11 -0
- eden_os/brain/brain_engine.py +188 -0
- eden_os/brain/memory_manager.py +154 -0
- eden_os/brain/persona_manager.py +165 -0
- eden_os/brain/reasoning_engine.py +118 -0
- eden_os/brain/streaming_bridge.py +163 -0
- eden_os/brain/template_loader.py +126 -0
- eden_os/conductor/__init__.py +11 -0
- eden_os/conductor/error_recovery.py +151 -0
- eden_os/conductor/latency_enforcer.py +132 -0
- eden_os/conductor/metrics_collector.py +135 -0
- eden_os/conductor/orchestrator.py +380 -0
- eden_os/conductor/session_manager.py +120 -0
- eden_os/gateway/__init__.py +11 -0
- eden_os/gateway/api_server.py +521 -0
- eden_os/gateway/audio_capture.py +154 -0
- eden_os/gateway/video_encoder.py +141 -0
- eden_os/gateway/webrtc_handler.py +132 -0
- eden_os/gateway/websocket_handler.py +308 -0
- eden_os/genesis/__init__.py +116 -0
- eden_os/genesis/eden_protocol_validator.py +177 -0
- eden_os/genesis/latent_encoder.py +129 -0
- eden_os/genesis/portrait_engine.py +233 -0
- eden_os/genesis/preload_cache.py +172 -0
- eden_os/scholar/__init__.py +134 -0
- eden_os/scholar/audiobook_ingestor.py +299 -0
- eden_os/scholar/knowledge_graph.py +305 -0
- eden_os/scholar/media_analyzer.py +201 -0
- eden_os/scholar/rag_retriever.py +237 -0
.dockerignore
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
.git
|
| 2 |
+
.venv
|
| 3 |
+
__pycache__
|
| 4 |
+
*.pyc
|
| 5 |
+
.env
|
| 6 |
+
*.egg-info
|
| 7 |
+
.mypy_cache
|
| 8 |
+
.pytest_cache
|
| 9 |
+
node_modules
|
.gitignore
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
.venv/
|
| 2 |
+
__pycache__/
|
| 3 |
+
*.pyc
|
| 4 |
+
*.egg-info/
|
| 5 |
+
.env
|
| 6 |
+
models_cache/
|
| 7 |
+
data/
|
| 8 |
+
*.pth
|
| 9 |
+
*.onnx
|
| 10 |
+
*.safetensors
|
| 11 |
+
*.bin
|
CLAUDE.md
ADDED
|
@@ -0,0 +1,559 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# EDEN OS — CLAUDE.md
|
| 2 |
+
# Master Orchestration File for Building 4D Bi-Directional Conversational Avatars
|
| 3 |
+
# Version: 1.0 | Phase: ONE (OS Pipeline) | Codename: OWN THE SCIENCE
|
| 4 |
+
# Generated: 2026-03-31 by Amanda (Avatar Pipeline Architect)
|
| 5 |
+
|
| 6 |
+
---
|
| 7 |
+
|
| 8 |
+
## MISSION STATEMENT
|
| 9 |
+
|
| 10 |
+
You are building **EDEN OS** — an operating system pipeline that converts any 2D portrait image into a photorealistic 4D conversational avatar capable of real-time bi-directional dialogue. The avatar must be so realistic that it is indistinguishable from a human video call. The system must be **conversation-ready upon load** — no warm-up, no buffering, no uncanny valley.
|
| 11 |
+
|
| 12 |
+
**"Own The Science"** means: we do not wrap APIs. We engineer our own inference pipeline using open-weight models from Hugging Face, orchestrated through a custom backend that delivers sub-200ms latency for the reasoning layer and sub-5-second total pipeline execution for the full video response cycle.
|
| 13 |
+
|
| 14 |
+
---
|
| 15 |
+
|
| 16 |
+
## PHASE ONE SCOPE: EDEN OS AS A HEADLESS OPERATING SYSTEM
|
| 17 |
+
|
| 18 |
+
EDEN OS is NOT an app. It is an **operating system** — a headless backend engine that exposes a universal API. Any frontend (React, Gradio, mobile app, kiosk, VR headset) plugs into it. When the build is complete, the OS deploys to a **HuggingFace Space** at `AIBRUH/eden-os` and is immediately accessible via a live URL.
|
| 19 |
+
|
| 20 |
+
### Deployment Target
|
| 21 |
+
- **Platform**: HuggingFace Spaces (Docker SDK with GPU)
|
| 22 |
+
- **Space ID**: `AIBRUH/eden-os`
|
| 23 |
+
- **Live URL**: `https://huggingface.co/spaces/AIBRUH/eden-os`
|
| 24 |
+
- **SDK**: Docker (not Gradio — we need full control over the server)
|
| 25 |
+
- **Hardware**: T4 GPU (free tier to start, upgrade to A10G/A100 for production)
|
| 26 |
+
- **Secrets**: `ANTHROPIC_API_KEY`, `HF_TOKEN` stored as HF Space secrets
|
| 27 |
+
|
| 28 |
+
### What the URL delivers
|
| 29 |
+
When you click the link, you see the **EDEN Studio admin panel** — EVE is displayed, idle-animating (blinking, breathing), ready to converse. The full admin panel UI loads: behavioral sliders, knowledge injection modal, pipeline controls. Click "Initiate Conversation" and EVE is live.
|
| 30 |
+
|
| 31 |
+
### Headless OS Architecture (Scalability from Ground Zero)
|
| 32 |
+
```
|
| 33 |
+
┌─────────────────────────────────────────────────────────┐
|
| 34 |
+
│ EDEN OS (Headless Engine) │
|
| 35 |
+
│ │
|
| 36 |
+
│ ┌─────────────────────────────────────────────────┐ │
|
| 37 |
+
│ │ EDEN OS API LAYER │ │
|
| 38 |
+
│ │ POST /api/v1/sessions (create session) │ │
|
| 39 |
+
│ │ WS /api/v1/sessions/{id}/stream (bi-dir AV) │ │
|
| 40 |
+
│ │ POST /api/v1/knowledge/ingest (feed media) │ │
|
| 41 |
+
│ │ PUT /api/v1/settings/{id} (sliders/config) │ │
|
| 42 |
+
│ │ GET /api/v1/health (status) │ │
|
| 43 |
+
│ └──────────────┬──────────────────────────────────┘ │
|
| 44 |
+
│ │ │
|
| 45 |
+
│ ┌──────────────▼──────────────────────────────────┐ │
|
| 46 |
+
│ │ CONDUCTOR (Orchestrator) │ │
|
| 47 |
+
│ │ Connects all 7 engines, enforces latency, │ │
|
| 48 |
+
│ │ manages sessions, routes data between engines │ │
|
| 49 |
+
│ └──────────────┬──────────────────────────────────┘ │
|
| 50 |
+
│ │ │
|
| 51 |
+
│ ┌──────┐ ┌──────┐ ┌──────┐ ┌──────┐ ┌──────┐ ┌──────┐│
|
| 52 |
+
│ │GENESIS│ │VOICE │ │ANIMTR│ │BRAIN │ │SCHOLR│ │GATWAY││
|
| 53 |
+
│ │Agent 1│ │Agent2│ │Agent3│ │Agent4│ │Agent7│ │Agent6││
|
| 54 |
+
│ └──────┘ └──────┘ └──────┘ └──────┘ └──────┘ └──────┘│
|
| 55 |
+
└─────────────────────────────────────────────────────────┘
|
| 56 |
+
│ ▲
|
| 57 |
+
▼ │
|
| 58 |
+
┌─────────────────────────────────────────────────────────┐
|
| 59 |
+
│ ANY FRONTEND (Pluggable) │
|
| 60 |
+
│ │
|
| 61 |
+
│ Option A: EDEN Studio (built-in, served at / ) │
|
| 62 |
+
│ Option B: React/Next.js SaaS app (Phase Two) │
|
| 63 |
+
│ Option C: Gradio interface (rapid prototyping) │
|
| 64 |
+
│ Option D: Mobile SDK (iOS/Android) │
|
| 65 |
+
│ Option E: Third-party integration via API │
|
| 66 |
+
│ Option F: Embed widget (like Naoma's website embed) │
|
| 67 |
+
└─────────────────────────────────────────────────────────┘
|
| 68 |
+
```
|
| 69 |
+
|
| 70 |
+
### The Core Principle
|
| 71 |
+
Every feature in EDEN OS is accessible through the API. The built-in EDEN Studio frontend is just one client. A developer should be able to `curl` the API and get a talking avatar response. This is what makes it an OS, not an app.
|
| 72 |
+
|
| 73 |
+
### API Contract (Universal Interface)
|
| 74 |
+
```
|
| 75 |
+
# Create a session — returns session_id + WebSocket URL
|
| 76 |
+
POST /api/v1/sessions
|
| 77 |
+
Body: { portrait_image: base64, template: "medical_office" }
|
| 78 |
+
Returns: { session_id, ws_url, status: "ready" }
|
| 79 |
+
|
| 80 |
+
# Stream bi-directional conversation (WebSocket)
|
| 81 |
+
WS /api/v1/sessions/{id}/stream
|
| 82 |
+
Send: { type: "audio", data: base64_pcm } ← user speaks
|
| 83 |
+
Send: { type: "text", content: "hello" } ← or types
|
| 84 |
+
Send: { type: "interrupt" } ← user interrupts
|
| 85 |
+
Receive: { type: "video_frame", data: base64 } ← animated avatar frame
|
| 86 |
+
Receive: { type: "audio", data: base64_wav } ← avatar voice
|
| 87 |
+
Receive: { type: "transcript", text: "..." } ← what avatar said
|
| 88 |
+
Receive: { type: "state", value: "speaking" } ← current state
|
| 89 |
+
|
| 90 |
+
# Inject knowledge (YouTube, audiobook, URL)
|
| 91 |
+
POST /api/v1/knowledge/ingest
|
| 92 |
+
Body: { type: "youtube", url: "https://..." }
|
| 93 |
+
Body: { type: "audiobook", file: base64_mp3 }
|
| 94 |
+
Body: { type: "url", url: "https://arxiv.org/..." }
|
| 95 |
+
Returns: { job_id, status: "processing", chunks_estimated: 127 }
|
| 96 |
+
|
| 97 |
+
# Update behavioral settings in real-time
|
| 98 |
+
PUT /api/v1/sessions/{id}/settings
|
| 99 |
+
Body: { expressiveness: 0.8, eye_contact: 1.0, voice_tone: 0.85 }
|
| 100 |
+
Returns: { applied: true }
|
| 101 |
+
|
| 102 |
+
# Swap models mid-session
|
| 103 |
+
PUT /api/v1/sessions/{id}/pipeline
|
| 104 |
+
Body: { tts_engine: "styletts2", animation_engine: "hunyuan" }
|
| 105 |
+
Returns: { swapped: true, reload_time_ms: 2400 }
|
| 106 |
+
```
|
| 107 |
+
|
| 108 |
+
### HuggingFace Space Structure (Dockerfile-based)
|
| 109 |
+
```
|
| 110 |
+
AIBRUH/eden-os/
|
| 111 |
+
├── Dockerfile # Multi-stage build, CUDA base image
|
| 112 |
+
├── app.py # Entry point — boots FastAPI + all engines
|
| 113 |
+
├── requirements.txt
|
| 114 |
+
├── static/
|
| 115 |
+
│ └── index.html # EDEN Studio admin panel (built-in frontend)
|
| 116 |
+
├── eden_os/
|
| 117 |
+
│ ├── __init__.py
|
| 118 |
+
│ ├── genesis/ # Agent 1
|
| 119 |
+
│ ├── voice/ # Agent 2
|
| 120 |
+
│ ├── animator/ # Agent 3
|
| 121 |
+
│ ├── brain/ # Agent 4
|
| 122 |
+
│ ├── conductor/ # Agent 5
|
| 123 |
+
│ ├── gateway/ # Agent 6
|
| 124 |
+
│ ├── scholar/ # Agent 7
|
| 125 |
+
│ └── shared/ # Shared types, interfaces, config
|
| 126 |
+
├── templates/ # Agent persona YAMLs
|
| 127 |
+
├── models_cache/ # Downloaded HF model weights (persistent volume)
|
| 128 |
+
└── README.md # HF Space card
|
| 129 |
+
```
|
| 130 |
+
|
| 131 |
+
### Existing AIBRUH Spaces Integration
|
| 132 |
+
Your existing HuggingFace Spaces become specialized microservices that EDEN OS can call:
|
| 133 |
+
- `AIBRUH/eve-voice-engine` → Voice engine can delegate to this for advanced TTS
|
| 134 |
+
- `AIBRUH/eden-realism-engine` → Genesis can call this for Eden Protocol validation
|
| 135 |
+
- `AIBRUH/eden-diffusion-studio` → Genesis can call this for FLUX portrait generation
|
| 136 |
+
- `AIBRUH/eden-comfyui-pipeline` → Animator can call this for advanced ComfyUI workflows
|
| 137 |
+
- `AIBRUH/eden-video-studio` → Conductor can delegate cinematic renders here
|
| 138 |
+
|
| 139 |
+
The core `AIBRUH/eden-os` Space is the brain that orchestrates everything. The other Spaces become optional accelerators.
|
| 140 |
+
|
| 141 |
+
### Phase One deliverable:
|
| 142 |
+
1. `AIBRUH/eden-os` Space is live on HuggingFace
|
| 143 |
+
2. Clicking the URL opens EDEN Studio with EVE ready to converse
|
| 144 |
+
3. The API is accessible at `https://AIBRUH-eden-os.hf.space/api/v1/`
|
| 145 |
+
4. Any developer can integrate EDEN OS into their own frontend via the API
|
| 146 |
+
|
| 147 |
+
---
|
| 148 |
+
|
| 149 |
+
## ARCHITECTURAL PHILOSOPHY: THE EDEN PROTOCOL
|
| 150 |
+
|
| 151 |
+
### The 0.3 Deviation Rule
|
| 152 |
+
Every generated frame must pass the **Eden Protocol Validator**: skin texture deviation from the reference portrait must remain below 0.3 standard deviations. This eliminates the "plastic skin" and "waxy sheen" artifacts that plague competing systems, especially on melanin-rich skin tones. This is our signature. This is how people know EDEN made it.
|
| 153 |
+
|
| 154 |
+
### The Three States of Presence
|
| 155 |
+
A truly bi-directional avatar must handle three simultaneous states:
|
| 156 |
+
|
| 157 |
+
1. **LISTENING** — The avatar maintains active listening behaviors (micro-blinks, subtle nodding, gaze tracking) while the user speaks. ASR processes audio in real-time. The avatar is NOT frozen.
|
| 158 |
+
2. **THINKING** — The LLM generates a response. First tokens trigger TTS immediately (streaming response). The avatar transitions from listening to a "processing" micro-expression (slight brow raise, inhale).
|
| 159 |
+
3. **SPEAKING** — The 4D diffusion/animation model generates video frames synchronized with the TTS audio stream. Frame-by-frame autoregressive generation with KV-cache for temporal consistency.
|
| 160 |
+
|
| 161 |
+
### The KV-Recache Interruption Protocol
|
| 162 |
+
When a user interrupts mid-response, the system must:
|
| 163 |
+
- Immediately halt TTS generation
|
| 164 |
+
- Refresh the KV-cache for future frames while preserving temporal anchors of the current face position (no glitch, no jump-cut)
|
| 165 |
+
- Transition the avatar back to LISTENING state within 100ms
|
| 166 |
+
- Begin processing the new user input
|
| 167 |
+
|
| 168 |
+
This is adapted from the LONGLIVE framework (arXiv:2509.22622) and is what separates EDEN from every "talking head" on the market.
|
| 169 |
+
|
| 170 |
+
---
|
| 171 |
+
|
| 172 |
+
## EDEN STUDIO ADMIN PANEL SPECIFICATION
|
| 173 |
+
|
| 174 |
+
The admin panel is the operator's control surface for EDEN OS. It has two views derived from the prototype UI: the **Main Control Surface** and the **Knowledge Injection Modal**. Every element maps to a real backend function.
|
| 175 |
+
|
| 176 |
+
### Main Control Surface (Admin Panel 1)
|
| 177 |
+
|
| 178 |
+
**Layout**: Three-column design on black (#080503) background. Left column: Settings + Backend. Center column: Pipeline controls + Connectivity. Right column: EVE avatar + context editor.
|
| 179 |
+
|
| 180 |
+
**Left Column — Behavioral Sliders**:
|
| 181 |
+
These sliders control LivePortrait's retargeting parameters and the Voice/Brain engines in real-time:
|
| 182 |
+
|
| 183 |
+
| Slider | Default | Maps To | Engine |
|
| 184 |
+
|--------|---------|---------|--------|
|
| 185 |
+
| **Consistency** | ~70% | Eden Protocol threshold. 100% = strict 0.3 deviation. Lower = relaxed matching | Genesis → `eden_protocol_validator.py` |
|
| 186 |
+
| **Latency** | 100% | Pipeline priority. 100% = max speed (Schnell, Kokoro, skip upscale). 0% = max quality (FLUX Pro, StyleTTS2, full upscale) | Conductor → `latency_enforcer.py` |
|
| 187 |
+
| **Expressiveness** | ~60% | LivePortrait retargeting amplitude. High = wide mouth, big brow raises. Low = subtle, reserved | Animator → `liveportrait_driver.py` expression_scale |
|
| 188 |
+
| **Voice Tone** | 85% | TTS pitch and warmth. High = warmer, richer. Low = neutral, clinical | Voice → `tts_engine.py` tone_warmth |
|
| 189 |
+
| **Eye Contact** | ~50% | Gaze lock to camera. 100% = locked on user. 0% = natural wandering gaze | Animator → `liveportrait_driver.py` gaze_lock |
|
| 190 |
+
| **Flirtation** | 15% | Composite: smile intensity + brow play + head tilt frequency + voice breathiness | Animator + Voice combined |
|
| 191 |
+
|
| 192 |
+
**Left Column — Buttons**:
|
| 193 |
+
- **Backend Settings** (gold) → GPU profile selector, model swap, API keys, Redis/Celery status, memory dashboard
|
| 194 |
+
- **Design** row (8 waveform icons) → Voice profile presets. Each icon = different voice character (warm female, authoritative male, calm soothing, etc.). Click to swap TTS voice instantly
|
| 195 |
+
|
| 196 |
+
**Center Column — Pipeline Controls**:
|
| 197 |
+
- **Model to Model** → Live-swap any model mid-session without restart. Switch CosyVoice2 → StyleTTS2 or LivePortrait Path A → HunyuanVideo-Avatar Path B
|
| 198 |
+
- **New Pipeline** → Pipeline builder (React Flow node editor). Drag-and-drop model nodes to create custom inference chains. Five archetypes:
|
| 199 |
+
1. **Low-Latency Streamer**: WebRTC → Whisper-Small → BitNet-3B → Kokoro → LivePortrait
|
| 200 |
+
2. **Emotive Actor**: Sentiment-Analyzer → Emotion-LoRA-Router → StyleTTS2 → HunyuanAvatar
|
| 201 |
+
3. **Knowledge Expert RAG**: Vector-DB → Context-Injection → Claude Sonnet → CosyVoice2 → LivePortrait
|
| 202 |
+
4. **Zero-Shot Creator**: User-Image-Upload → IP-Adapter → FLUX → LivePortrait
|
| 203 |
+
5. **Director's Cut**: Human-in-the-Loop → Manual-Pose-Control → LivePortrait
|
| 204 |
+
- **Connectivity** → Real-time status: WebRTC CONNECTED/DISCONNECTED, WebSocket fallback, GPU util, active models
|
| 205 |
+
|
| 206 |
+
**Right Column — Avatar + Context**:
|
| 207 |
+
- **EVE portrait** — live animated video feed during conversation
|
| 208 |
+
- **EDEN** pill button (top-right) → Avatar identity selector. Swap between different avatar models
|
| 209 |
+
- **Custom Instructions & Context Ref** overlay → System prompt editor overlaying EVE. Markdown-supported persona instructions. Shows active context document
|
| 210 |
+
- **Apply to EVE's Memory** → Commits instructions to Brain's persona manager + persists key facts to long-term memory
|
| 211 |
+
- **Compliance Matter** badge → Visual indicator that persona is compliance-reviewed (medical/financial)
|
| 212 |
+
|
| 213 |
+
**Bottom Row — Action Buttons**:
|
| 214 |
+
- **Build Voice Agent** → Voice agent creation wizard: template → persona → voice clone → appearance → deploy
|
| 215 |
+
- **Hair & Wardrobe** → Appearance editor. Changes hair, clothing, accessories, background via FLUX inpainting with IP-Adapter identity lock (face preserved, outfit changed)
|
| 216 |
+
- **THE VOICE** (large gold bar) → Full voice config: cloning upload, emotion sliders, speed, language, preview
|
| 217 |
+
- **Initiate Conversation** (large gold bar) → Primary CTA. Boots pipeline, starts idle loop, activates ASR, enters conversation mode
|
| 218 |
+
|
| 219 |
+
### Knowledge Injection Modal (Admin Panel 2)
|
| 220 |
+
|
| 221 |
+
This modal is the **intelligence layer**. It feeds EVE domain knowledge so she can discuss specific content with authority. This is the Naoma-killer: instead of a cartoon avatar reading a sales script, EVE is a photorealistic human who has consumed your product demos, audiobooks, and research.
|
| 222 |
+
|
| 223 |
+
**Input Fields**:
|
| 224 |
+
|
| 225 |
+
1. **YouTube URL Input** (with Paste button)
|
| 226 |
+
- Paste any YouTube URL. System extracts full transcript via `yt-dlp` + Whisper, key topics with timestamps, visual descriptions of product UI via frame sampling + vision model
|
| 227 |
+
- Injected into Brain's knowledge base as structured context
|
| 228 |
+
- EVE can: "Let me walk you through what was shown at the 3:42 mark of that demo..."
|
| 229 |
+
- **Use case**: Feed product demo video → EVE becomes 24/7 sales agent who discusses every feature like a human colleague who watched the video
|
| 230 |
+
|
| 231 |
+
2. **Audiobook / Media URL Input** (with Upload button)
|
| 232 |
+
- Upload MP3/WAV/M4A or paste media URLs
|
| 233 |
+
- Full transcription via Whisper → semantic chunking → vector store embedding
|
| 234 |
+
- EVE discusses themes, references passages, answers questions about content
|
| 235 |
+
- **Use case**: Feed medical textbook audio → EVE tutors students on any concept from the book
|
| 236 |
+
|
| 237 |
+
3. **Research / Prompt URL**
|
| 238 |
+
- Paste URL to arXiv paper, PDF, web article
|
| 239 |
+
- Fetches content, extracts text, chunks and embeds in RAG store
|
| 240 |
+
- EVE discusses findings, compares methodologies, explains concepts
|
| 241 |
+
- **Use case**: Feed company whitepaper → EVE presents your research as subject-matter expert
|
| 242 |
+
|
| 243 |
+
4. **Natural Language Prompt for Prototyping** (large textarea)
|
| 244 |
+
- Free-form meta-instructions for building new agent behaviors
|
| 245 |
+
- Example: "Create a conversational agent with VASA-1 level realness inspired by the Teller and Soul papers..."
|
| 246 |
+
- Tells the Brain engine how to configure itself. Supports model/paper references
|
| 247 |
+
- **Send Prompt** button fires instruction to Conductor
|
| 248 |
+
|
| 249 |
+
5. **Analyze Media Sources** (gold bar button)
|
| 250 |
+
- Batch processes all ingested media: transcription → chunking → embedding → knowledge graph construction
|
| 251 |
+
- Shows progress and summary of extracted knowledge
|
| 252 |
+
- Once complete, EVE's Brain has the full knowledge base loaded and ready
|
| 253 |
+
|
| 254 |
+
### What Makes EDEN OS Different from Naoma
|
| 255 |
+
|
| 256 |
+
| Capability | Naoma | EDEN OS |
|
| 257 |
+
|-----------|-------|---------|
|
| 258 |
+
| Avatar realism | Cartoon/basic | Photorealistic 4D human (Eden Protocol) |
|
| 259 |
+
| Lip-sync | Basic mouth movement | Phoneme-accurate LivePortrait at 78fps |
|
| 260 |
+
| Knowledge sources | Sales script + KB | YouTube + audiobooks + research papers + live URLs |
|
| 261 |
+
| Interruption handling | Limited | Full KV-Recache protocol (<100ms) |
|
| 262 |
+
| Real-time tuning | None | Live behavioral sliders (expressiveness, eye contact, flirtation) |
|
| 263 |
+
| Voice | Standard TTS | CosyVoice2 zero-shot cloning + emotion routing |
|
| 264 |
+
| Product demos | Script playback | Contextual video discussion with timestamp references |
|
| 265 |
+
| Deployment | Cloud only | Self-hosted on RTX 3090+, data never leaves your machine |
|
| 266 |
+
|
| 267 |
+
---
|
| 268 |
+
|
| 269 |
+
## AGENT TEAM SPECIFICATION
|
| 270 |
+
|
| 271 |
+
This project is built by a team of **7 specialized Claude Code agents** working in parallel. Each agent owns a vertical slice of the system. Agents communicate through shared file interfaces and a central orchestration manifest.
|
| 272 |
+
|
| 273 |
+
---
|
| 274 |
+
|
| 275 |
+
### AGENT 1: GENESIS (Portrait-to-4D Engine)
|
| 276 |
+
|
| 277 |
+
**Role**: Owns the image generation and 4D avatar creation pipeline.
|
| 278 |
+
**Objective**: Convert any 2D image into a temporally-consistent 4D avatar mesh/latent that can be animated in real-time.
|
| 279 |
+
|
| 280 |
+
**Model Stack (ordered by priority)**:
|
| 281 |
+
| Model | Purpose | HF Repo | VRAM | Latency |
|
| 282 |
+
|-------|---------|---------|------|---------|
|
| 283 |
+
| FLUX 1.0 Pro | Portrait generation/enhancement | `black-forest-labs/FLUX.1-pro` | 22GB | 4-6s |
|
| 284 |
+
| FLUX.1-schnell | Fast preview / real-time feedback | `black-forest-labs/FLUX.1-schnell` | 14GB | 1-2s |
|
| 285 |
+
| IP-Adapter FaceID | Identity preservation from upload | `h94/IP-Adapter-FaceID` | 2GB | <1s |
|
| 286 |
+
| RealESRGAN x4 | Background upscale (async) | `ai-forever/Real-ESRGAN` | 1GB | <1s |
|
| 287 |
+
|
| 288 |
+
**Tasks**:
|
| 289 |
+
1. Build `genesis/portrait_engine.py` — accepts uploaded image, runs face detection (MediaPipe or InsightFace), crops and aligns face, generates enhanced portrait via FLUX with IP-Adapter for identity lock
|
| 290 |
+
2. Build `genesis/eden_protocol_validator.py` — implements the 0.3 deviation rule. Extracts micro-features (pores, freckles, beauty marks) from reference and generated images. Rejects and regenerates if deviation exceeds threshold
|
| 291 |
+
3. Build `genesis/latent_encoder.py` — encodes the portrait into the latent space compatible with the animation engine (MuseTalk/HunyuanVideo-Avatar latent format)
|
| 292 |
+
4. Build `genesis/preload_cache.py` — pre-computes the avatar's idle animations (blinks, micro-movements, breathing) so the avatar is alive on page load with ZERO wait time
|
| 293 |
+
|
| 294 |
+
**Critical Constraint**: The portrait must be generated/processed and cached BEFORE the user initiates conversation. The "ready on load" requirement means Genesis runs during the setup phase, not during chat.
|
| 295 |
+
|
| 296 |
+
**File Output**: `genesis/` directory with all modules. Exports a `GenesisEngine` class with methods: `process_upload()`, `generate_portrait()`, `validate_eden_protocol()`, `encode_to_latent()`, `precompute_idle_cache()`
|
| 297 |
+
|
| 298 |
+
---
|
| 299 |
+
|
| 300 |
+
### AGENT 2: VOICE (TTS + Voice Cloning + ASR)
|
| 301 |
+
|
| 302 |
+
**Role**: Owns all audio — speech recognition, voice synthesis, voice cloning, emotion injection.
|
| 303 |
+
**Objective**: Deliver emotionally-aware, sub-200ms TTS with optional voice cloning from 10-second reference audio.
|
| 304 |
+
|
| 305 |
+
**Model Stack**:
|
| 306 |
+
| Model | Purpose | HF Repo | VRAM | Latency |
|
| 307 |
+
|-------|---------|---------|------|---------|
|
| 308 |
+
| Whisper Large v3 Turbo | Real-time ASR | `openai/whisper-large-v3-turbo` | 3GB | <500ms |
|
| 309 |
+
| CosyVoice 2 | Primary TTS + zero-shot cloning | `FunAudioLLM/CosyVoice2-0.5B` | 2GB | <300ms |
|
| 310 |
+
| Kokoro v1.0 | Fallback baseline TTS | `hexgrad/Kokoro-82M` | <1GB | <200ms |
|
| 311 |
+
| StyleTTS2 | Emotional voice cloning (premium) | `yl4579/StyleTTS2` | 4GB | 2-4s |
|
| 312 |
+
| Silero VAD | Voice Activity Detection | `snakers5/silero-vad` | <1GB | <10ms |
|
| 313 |
+
|
| 314 |
+
**Tasks**:
|
| 315 |
+
1. Build `voice/asr_engine.py` — real-time speech-to-text using Whisper with Silero VAD for endpoint detection. Must support streaming (partial transcripts while user is still speaking). Implements the LISTENING state audio pipeline
|
| 316 |
+
2. Build `voice/tts_engine.py` — text-to-speech with CosyVoice 2 as primary (supports zero-shot cloning from 3-10s reference). Kokoro as fallback. Must support streaming output (begin generating audio from first LLM tokens, don't wait for complete response)
|
| 317 |
+
3. Build `voice/voice_cloner.py` — async voice cloning pipeline. Accepts reference audio, extracts voice embedding, stores for future TTS calls. Emotion dict: `{joy, sadness, confidence, urgency, warmth}` each 0.0-1.0
|
| 318 |
+
4. Build `voice/emotion_router.py` — analyzes LLM response text sentiment and automatically adjusts TTS emotion parameters. A medical agent should sound warm and reassuring
|
| 319 |
+
5. Build `voice/interruption_handler.py` — detects when the user begins speaking while the avatar is still talking. Immediately signals the orchestrator to halt TTS generation, flush the audio buffer, and transition to LISTENING state
|
| 320 |
+
|
| 321 |
+
**Critical Constraint**: TTS must begin streaming audio from the FIRST LLM token. Do NOT wait for the full LLM response. This is what makes the avatar feel "alive" — it starts speaking as it thinks, just like a human.
|
| 322 |
+
|
| 323 |
+
**File Output**: `voice/` directory. Exports a `VoiceEngine` class with methods: `start_listening()`, `stop_listening()`, `synthesize_stream()`, `clone_voice()`, `detect_interruption()`
|
| 324 |
+
|
| 325 |
+
---
|
| 326 |
+
|
| 327 |
+
### AGENT 3: ANIMATOR (Lip-Sync + 4D Motion Engine)
|
| 328 |
+
|
| 329 |
+
**Role**: Owns all facial animation — lip-sync, head motion, micro-expressions, idle behavior, and the critical state transitions (LISTENING ↔ THINKING ↔ SPEAKING).
|
| 330 |
+
**Objective**: Generate 60fps photorealistic facial animation driven by audio, synchronized with TTS output, with zero uncanny valley.
|
| 331 |
+
|
| 332 |
+
**Model Stack (Dual-Path Architecture)**:
|
| 333 |
+
|
| 334 |
+
**Path A — Implicit Keypoint Path (PRIMARY, for real-time)**:
|
| 335 |
+
| Model | Purpose | HF Repo | Speed | VRAM |
|
| 336 |
+
|-------|---------|---------|-------|------|
|
| 337 |
+
| LivePortrait | Implicit keypoint extraction + stitching + retargeting | `KwaiVGI/LivePortrait` | 12.8ms/frame (78fps on RTX 4090) | 4GB |
|
| 338 |
+
| LivePortrait Retargeting MLP | Eyes + lips fine control via scalar inputs | Included in LivePortrait | <1ms | Negligible |
|
| 339 |
+
|
| 340 |
+
LivePortrait is the PRIMARY animation engine because:
|
| 341 |
+
- It uses implicit keypoints (compact blendshapes) rather than heavy diffusion, achieving 12.8ms per frame
|
| 342 |
+
- The stitching module seamlessly pastes the animated face back into the original image — no shoulder glitches, no border artifacts
|
| 343 |
+
- Eyes retargeting and lip retargeting modules accept scalar inputs, giving us precise programmatic control over gaze direction and mouth shape
|
| 344 |
+
- Trained on 69 million high-quality frames with mixed image-video strategy — best-in-class generalization across ethnicities, art styles, and lighting
|
| 345 |
+
- The entire pipeline (appearance extractor → motion extractor → warping → decoder → stitching) runs in under 13ms
|
| 346 |
+
|
| 347 |
+
**Path B — Diffusion Path (PREMIUM, for cinematic quality)**:
|
| 348 |
+
| Model | Purpose | HF Repo | Speed | VRAM |
|
| 349 |
+
|-------|---------|---------|-------|------|
|
| 350 |
+
| HunyuanVideo-Avatar | MM-DiT audio-driven animation with emotion control | `tencent/HunyuanVideo-Avatar` | ~2s per clip | 16GB |
|
| 351 |
+
| MuseTalk v2 | Latent-space lip-sync inpainting | `TMElyralab/MuseTalk` | 30fps on V100 | 4GB |
|
| 352 |
+
| Hallo 3 | Diffusion transformer portrait animation | Community | ~3s per clip | 12GB |
|
| 353 |
+
|
| 354 |
+
**Path Selection Logic**:
|
| 355 |
+
- Real-time conversation → Path A (LivePortrait) — always
|
| 356 |
+
- Pre-rendered cinematic content → Path B (HunyuanVideo-Avatar)
|
| 357 |
+
- Fallback if LivePortrait fails → MuseTalk v2
|
| 358 |
+
|
| 359 |
+
**Tasks**:
|
| 360 |
+
1. Build `animator/liveportrait_driver.py` — wraps LivePortrait's inference pipeline. Accepts audio features (from Voice engine) and converts them to implicit keypoint deltas for lip retargeting. Maps phoneme sequences to mouth shapes via the lip retargeting MLP. Handles eye blink injection, gaze direction from user webcam (if available), and natural head sway
|
| 361 |
+
2. Build `animator/idle_generator.py` — generates the LISTENING state idle loop. Uses LivePortrait's retargeting modules to produce natural blinks (every 3-7 seconds, randomized), micro head movements (+/-2 degrees rotation), subtle breathing motion (chest/shoulder rise), and occasional eyebrow micro-raises. This loop runs CONTINUOUSLY when the avatar is not speaking
|
| 362 |
+
3. Build `animator/state_machine.py` — manages transitions between the three states of presence:
|
| 363 |
+
- LISTENING → THINKING: triggered by ASR endpoint detection. Avatar does a subtle inhale, slight brow raise
|
| 364 |
+
- THINKING → SPEAKING: triggered by first TTS audio chunk. Avatar opens mouth, begins lip-sync
|
| 365 |
+
- SPEAKING → LISTENING: triggered by TTS completion or user interruption. Avatar closes mouth, returns to idle loop
|
| 366 |
+
- SPEAKING → LISTENING (INTERRUPT): triggered by `interruption_handler`. Immediate halt, smooth transition back to idle within 100ms using KV-recache technique adapted from LONGLIVE
|
| 367 |
+
4. Build `animator/audio_to_keypoints.py` — the critical bridge between Voice and Animator. Converts audio waveform features (mel spectrogram, pitch, energy) into LivePortrait-compatible implicit keypoint deltas. This replaces the need for a "driving video" — audio becomes the driver
|
| 368 |
+
5. Build `animator/eden_temporal_anchor.py` — implements the temporal consistency system adapted from LONGLIVE's frame sink concept. Always maintains the first frame of each conversation turn as a "global anchor" so the avatar never drifts from its identity over long conversations. Prevents the "latent collapse" phenomenon where AI faces slowly lose their identity
|
| 369 |
+
|
| 370 |
+
**Critical Constraint**: The animator must NEVER produce a frozen frame. Even during model loading or state transitions, the idle loop must continue. The avatar is always alive.
|
| 371 |
+
|
| 372 |
+
**File Output**: `animator/` directory. Exports an `AnimatorEngine` class with methods: `start_idle_loop()`, `drive_from_audio()`, `transition_state()`, `get_current_frame()`, `apply_eden_anchor()`
|
| 373 |
+
|
| 374 |
+
---
|
| 375 |
+
|
| 376 |
+
### AGENT 4: BRAIN (LLM Reasoning + Context Engine)
|
| 377 |
+
|
| 378 |
+
**Role**: Owns the conversational intelligence — LLM integration, system prompts, memory, context management, and persona behavior.
|
| 379 |
+
**Objective**: Deliver context-aware, persona-consistent responses with sub-200ms first-token latency via streaming.
|
| 380 |
+
|
| 381 |
+
**Model Stack (Tiered)**:
|
| 382 |
+
| Model | Purpose | Provider | Latency | Cost |
|
| 383 |
+
|-------|---------|----------|---------|------|
|
| 384 |
+
| Claude Sonnet 4 | Primary reasoning (cloud) | Anthropic API | <150ms first token | $0.003/1K tokens |
|
| 385 |
+
| Qwen 3 8B (GGUF Q4) | Local fallback / offline mode | `Qwen/Qwen3-8B-GGUF` via llama.cpp | <300ms first token | $0 |
|
| 386 |
+
| BitNet b1.58 3B | Ultra-efficient edge mode | `microsoft/BitNet` via llama.cpp | <200ms first token | $0 |
|
| 387 |
+
|
| 388 |
+
**Tiered Selection**:
|
| 389 |
+
- Internet available + API key configured → Claude Sonnet 4 (best quality)
|
| 390 |
+
- Offline or API failure → Qwen 3 8B via llama.cpp (good quality, runs on CPU+GPU)
|
| 391 |
+
- Edge deployment / mobile / low-VRAM → BitNet 3B (acceptable quality, runs on CPU only, frees GPU for animation)
|
| 392 |
+
|
| 393 |
+
**Tasks**:
|
| 394 |
+
1. Build `brain/reasoning_engine.py` — LLM interface with streaming response. Must yield tokens as they arrive (not wait for complete response). Supports both Anthropic API (cloud) and llama.cpp (local). Handles system prompt injection, conversation history, and persona context
|
| 395 |
+
2. Build `brain/persona_manager.py` — loads agent persona from YAML template files. Each persona defines: name, role, tone, knowledge domain, emotional baseline, conversation boundaries. The persona shapes every response
|
| 396 |
+
3. Build `brain/memory_manager.py` — maintains conversation history within session. Implements sliding window context (last 20 turns). Extracts key facts mentioned by user for context persistence. Future: vector DB integration for long-term memory
|
| 397 |
+
4. Build `brain/streaming_bridge.py` — the critical integration point. As LLM tokens stream in, this module:
|
| 398 |
+
- Buffers tokens until a natural speech boundary (sentence end, comma pause, etc.)
|
| 399 |
+
- Sends each buffer to Voice engine for TTS generation
|
| 400 |
+
- Voice engine sends audio chunks to Animator engine for lip-sync
|
| 401 |
+
- Result: the avatar begins speaking within 500ms of the user finishing their question
|
| 402 |
+
5. Build `brain/template_loader.py` — loads and validates agent templates (YAML). Templates define the full agent configuration: persona, voice profile, visual appearance preferences, knowledge base references
|
| 403 |
+
|
| 404 |
+
**Template Schema**:
|
| 405 |
+
```yaml
|
| 406 |
+
# templates/medical_office.yaml
|
| 407 |
+
agent:
|
| 408 |
+
name: "Dr. Rivera's Assistant"
|
| 409 |
+
role: "Medical office receptionist"
|
| 410 |
+
persona:
|
| 411 |
+
tone: warm
|
| 412 |
+
pace: moderate
|
| 413 |
+
formality: professional
|
| 414 |
+
emotional_baseline: {joy: 0.6, confidence: 0.8, warmth: 0.9}
|
| 415 |
+
system_prompt: |
|
| 416 |
+
You are a warm, professional medical office assistant for Dr. Rivera's
|
| 417 |
+
family practice. You help patients schedule appointments, answer general
|
| 418 |
+
questions about office hours and services, and collect basic intake
|
| 419 |
+
information. You are HIPAA-aware and never discuss other patients.
|
| 420 |
+
You speak clearly and reassuringly.
|
| 421 |
+
voice:
|
| 422 |
+
engine: cosyvoice2
|
| 423 |
+
reference_audio: null # uses default warm female voice
|
| 424 |
+
speed: 0.95
|
| 425 |
+
emotion_override: {warmth: 0.9, confidence: 0.7}
|
| 426 |
+
appearance:
|
| 427 |
+
portrait_prompt: "Professional woman, warm smile, medical office background"
|
| 428 |
+
style: photorealistic
|
| 429 |
+
eden_protocol: strict
|
| 430 |
+
knowledge_base:
|
| 431 |
+
- office_hours.md
|
| 432 |
+
- services.md
|
| 433 |
+
- insurance_accepted.md
|
| 434 |
+
```
|
| 435 |
+
|
| 436 |
+
**File Output**: `brain/` directory. Exports a `BrainEngine` class with methods: `reason_stream()`, `load_persona()`, `get_context()`, `process_user_input()`
|
| 437 |
+
|
| 438 |
+
---
|
| 439 |
+
|
| 440 |
+
### AGENT 5: CONDUCTOR (Pipeline Orchestrator + State Manager)
|
| 441 |
+
|
| 442 |
+
**Role**: Owns the end-to-end orchestration — connects all engines, manages data flow, handles errors, enforces latency budgets, and serves as the single entry point for the system.
|
| 443 |
+
**Objective**: Orchestrate the full pipeline from user input to avatar video output in under 5 seconds total, with the avatar appearing alive and responsive at all times.
|
| 444 |
+
|
| 445 |
+
**Tasks**:
|
| 446 |
+
1. Build `conductor/orchestrator.py` — the master controller. Implements the full pipeline.
|
| 447 |
+
2. Build `conductor/latency_enforcer.py` — monitors each pipeline stage and enforces latency budgets.
|
| 448 |
+
3. Build `conductor/error_recovery.py` — handles failures gracefully.
|
| 449 |
+
4. Build `conductor/session_manager.py` — manages the lifecycle of a conversation session.
|
| 450 |
+
5. Build `conductor/metrics_collector.py` — collects real-time performance metrics.
|
| 451 |
+
|
| 452 |
+
**File Output**: `conductor/` directory. Exports a `Conductor` class as the single entry point: `Conductor(config).create_session().start_conversation()`
|
| 453 |
+
|
| 454 |
+
---
|
| 455 |
+
|
| 456 |
+
### AGENT 6: GATEWAY (WebRTC Server + API Layer)
|
| 457 |
+
|
| 458 |
+
**Role**: Owns the network layer — WebRTC signaling, video/audio streaming, REST API for session management, and the frontend connection.
|
| 459 |
+
**Objective**: Stream the avatar video to the user's browser at 60fps with sub-500ms latency, handle audio input capture, and provide a clean API for session lifecycle.
|
| 460 |
+
|
| 461 |
+
**Tasks**:
|
| 462 |
+
1. Build `gateway/api_server.py` — FastAPI application with all endpoints from API Contract
|
| 463 |
+
2. Build `gateway/webrtc_handler.py` — WebRTC signaling and media transport
|
| 464 |
+
3. Build `gateway/audio_capture.py` — processes incoming WebRTC audio
|
| 465 |
+
4. Build `gateway/video_encoder.py` — encodes animator output frames to streamable video
|
| 466 |
+
5. Build `gateway/websocket_handler.py` — WebSocket fallback streaming
|
| 467 |
+
|
| 468 |
+
**File Output**: `gateway/` directory. Exports a `GatewayServer` class with method: `start(host, port)` that boots the entire API + WebRTC server
|
| 469 |
+
|
| 470 |
+
---
|
| 471 |
+
|
| 472 |
+
### AGENT 7: SCHOLAR (Knowledge Engine + Media Ingestion)
|
| 473 |
+
|
| 474 |
+
**Role**: Owns all knowledge ingestion — YouTube transcription, audiobook processing, research paper parsing, URL scraping, RAG vector store, and the knowledge graph that makes EVE an expert on any topic you feed her.
|
| 475 |
+
**Objective**: Transform any media source (video, audio, text, URL) into structured knowledge that the Brain engine can retrieve during conversation, with citation-level accuracy.
|
| 476 |
+
|
| 477 |
+
**Tasks**:
|
| 478 |
+
1. Build `scholar/youtube_ingestor.py` — the YouTube knowledge pipeline
|
| 479 |
+
2. Build `scholar/audiobook_ingestor.py` — audiobook and media processing
|
| 480 |
+
3. Build `scholar/url_ingestor.py` — web and research paper ingestion
|
| 481 |
+
4. Build `scholar/knowledge_graph.py` — connects ingested knowledge
|
| 482 |
+
5. Build `scholar/rag_retriever.py` — the retrieval interface for the Brain
|
| 483 |
+
6. Build `scholar/media_analyzer.py` — the "Analyze Media Sources" button handler
|
| 484 |
+
|
| 485 |
+
**File Output**: `scholar/` directory. Exports a `ScholarEngine` class with methods: `ingest_youtube()`, `ingest_audiobook()`, `ingest_url()`, `analyze_all()`, `retrieve()`, `get_knowledge_summary()`
|
| 486 |
+
|
| 487 |
+
---
|
| 488 |
+
|
| 489 |
+
## MODEL PRIORITY MATRIX
|
| 490 |
+
|
| 491 |
+
```
|
| 492 |
+
| HARDWARE TIER | ANIMATION | TTS | LLM |
|
| 493 |
+
|-------------------|-----------------|---------------|----------|
|
| 494 |
+
| H100 (80GB) | HunyuanAvatar | CosyVoice2 | Claude |
|
| 495 |
+
| RTX 4090 (24GB) | LivePortrait | CosyVoice2 | Claude |
|
| 496 |
+
| RTX 3090 (24GB) | LivePortrait | Kokoro | Qwen3 8B |
|
| 497 |
+
| L4 (24GB) | LivePortrait | Kokoro | Claude |
|
| 498 |
+
| CPU Only | LivePortrait | Kokoro | BitNet 3B|
|
| 499 |
+
```
|
| 500 |
+
|
| 501 |
+
---
|
| 502 |
+
|
| 503 |
+
## ENVIRONMENT AND DEPENDENCIES
|
| 504 |
+
|
| 505 |
+
### requirements.txt
|
| 506 |
+
```
|
| 507 |
+
# Core
|
| 508 |
+
fastapi==0.115.0
|
| 509 |
+
uvicorn==0.30.0
|
| 510 |
+
websockets==12.0
|
| 511 |
+
pydantic==2.9.0
|
| 512 |
+
|
| 513 |
+
# ML / Inference
|
| 514 |
+
torch==2.4.0
|
| 515 |
+
torchaudio==2.4.0
|
| 516 |
+
torchvision==0.19.0
|
| 517 |
+
transformers==4.45.0
|
| 518 |
+
diffusers==0.31.0
|
| 519 |
+
accelerate==0.34.0
|
| 520 |
+
safetensors==0.4.5
|
| 521 |
+
huggingface-hub==0.25.0
|
| 522 |
+
|
| 523 |
+
# LivePortrait dependencies
|
| 524 |
+
insightface==0.7.3
|
| 525 |
+
onnxruntime-gpu==1.19.0
|
| 526 |
+
mediapipe==0.10.14
|
| 527 |
+
|
| 528 |
+
# Voice
|
| 529 |
+
openai-whisper==20231117
|
| 530 |
+
silero-vad==5.1
|
| 531 |
+
|
| 532 |
+
# WebRTC
|
| 533 |
+
aiortc==1.9.0
|
| 534 |
+
|
| 535 |
+
# Image/Video processing
|
| 536 |
+
opencv-python-headless==4.10.0
|
| 537 |
+
Pillow==10.4.0
|
| 538 |
+
numpy==1.26.4
|
| 539 |
+
scipy==1.14.0
|
| 540 |
+
scikit-image==0.24.0
|
| 541 |
+
|
| 542 |
+
# Utilities
|
| 543 |
+
pyyaml==6.0.2
|
| 544 |
+
anthropic==0.34.0
|
| 545 |
+
loguru==0.7.2
|
| 546 |
+
|
| 547 |
+
# Scholar / Knowledge Engine (Agent 7)
|
| 548 |
+
yt-dlp==2024.10.22
|
| 549 |
+
chromadb==0.5.5
|
| 550 |
+
sentence-transformers==3.1.0
|
| 551 |
+
trafilatura==1.12.0
|
| 552 |
+
pymupdf==1.24.10
|
| 553 |
+
```
|
| 554 |
+
|
| 555 |
+
---
|
| 556 |
+
|
| 557 |
+
**OWN THE SCIENCE.**
|
| 558 |
+
**EDEN OS v1.0 — Phase One**
|
| 559 |
+
**Built by Amanda + 7 Claude Code Agents**
|
Dockerfile
ADDED
|
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
FROM nvidia/cuda:12.4.0-runtime-ubuntu22.04
|
| 2 |
+
|
| 3 |
+
# Prevent interactive prompts
|
| 4 |
+
ENV DEBIAN_FRONTEND=noninteractive
|
| 5 |
+
|
| 6 |
+
# System dependencies
|
| 7 |
+
RUN apt-get update && apt-get install -y \
|
| 8 |
+
python3.11 python3.11-venv python3-pip \
|
| 9 |
+
ffmpeg git git-lfs \
|
| 10 |
+
libgl1-mesa-glx libglib2.0-0 \
|
| 11 |
+
&& rm -rf /var/lib/apt/lists/*
|
| 12 |
+
|
| 13 |
+
# Set python3.11 as default
|
| 14 |
+
RUN update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.11 1
|
| 15 |
+
|
| 16 |
+
WORKDIR /app
|
| 17 |
+
|
| 18 |
+
# Install Python dependencies
|
| 19 |
+
COPY requirements.txt .
|
| 20 |
+
RUN pip install --no-cache-dir --upgrade pip && \
|
| 21 |
+
pip install --no-cache-dir -r requirements.txt
|
| 22 |
+
|
| 23 |
+
# Copy application code
|
| 24 |
+
COPY . .
|
| 25 |
+
|
| 26 |
+
# Create necessary directories
|
| 27 |
+
RUN mkdir -p models_cache data/chromadb templates static
|
| 28 |
+
|
| 29 |
+
# HuggingFace Spaces expects port 7860
|
| 30 |
+
EXPOSE 7860
|
| 31 |
+
|
| 32 |
+
ENV EDEN_PORT=7860
|
| 33 |
+
ENV EDEN_HOST=0.0.0.0
|
| 34 |
+
ENV EDEN_HARDWARE_PROFILE=auto
|
| 35 |
+
ENV EDEN_MODELS_CACHE=/app/models_cache
|
| 36 |
+
ENV EDEN_LOG_LEVEL=INFO
|
| 37 |
+
|
| 38 |
+
CMD ["python3", "app.py"]
|
app.py
ADDED
|
@@ -0,0 +1,163 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
EDEN OS — Entry Point
|
| 3 |
+
Boots the full operating system and serves:
|
| 4 |
+
- EDEN Studio frontend at /
|
| 5 |
+
- EDEN OS API at /api/v1/
|
| 6 |
+
- WebSocket stream at /api/v1/sessions/{id}/stream
|
| 7 |
+
|
| 8 |
+
Deploy to HuggingFace Spaces as Docker SDK.
|
| 9 |
+
Space ID: AIBRUH/eden-os
|
| 10 |
+
"""
|
| 11 |
+
|
| 12 |
+
import os
|
| 13 |
+
import sys
|
| 14 |
+
import time
|
| 15 |
+
from pathlib import Path
|
| 16 |
+
|
| 17 |
+
import yaml
|
| 18 |
+
import uvicorn
|
| 19 |
+
from loguru import logger
|
| 20 |
+
|
| 21 |
+
# Configure loguru
|
| 22 |
+
logger.remove()
|
| 23 |
+
logger.add(
|
| 24 |
+
sys.stderr,
|
| 25 |
+
format="<green>{time:HH:mm:ss}</green> | <level>{level: <8}</level> | <cyan>{name}</cyan> — <level>{message}</level>",
|
| 26 |
+
level=os.getenv("EDEN_LOG_LEVEL", "INFO"),
|
| 27 |
+
)
|
| 28 |
+
|
| 29 |
+
|
| 30 |
+
def detect_hardware() -> dict:
|
| 31 |
+
"""Auto-detect GPU hardware and select profile."""
|
| 32 |
+
try:
|
| 33 |
+
import torch
|
| 34 |
+
if torch.cuda.is_available():
|
| 35 |
+
gpu_name = torch.cuda.get_device_name(0)
|
| 36 |
+
vram_bytes = torch.cuda.get_device_properties(0).total_mem
|
| 37 |
+
vram_gb = vram_bytes / (1024 ** 3)
|
| 38 |
+
|
| 39 |
+
if "H100" in gpu_name or "A100" in gpu_name:
|
| 40 |
+
profile = "h100_cinematic"
|
| 41 |
+
elif "4090" in gpu_name:
|
| 42 |
+
profile = "rtx4090_production"
|
| 43 |
+
elif "3090" in gpu_name or "3080" in gpu_name:
|
| 44 |
+
profile = "rtx3090_standard"
|
| 45 |
+
elif "L4" in gpu_name or "T4" in gpu_name:
|
| 46 |
+
profile = "l4_cloud"
|
| 47 |
+
else:
|
| 48 |
+
profile = "l4_cloud" # default GPU profile
|
| 49 |
+
|
| 50 |
+
return {
|
| 51 |
+
"gpu": gpu_name,
|
| 52 |
+
"vram_gb": round(vram_gb, 1),
|
| 53 |
+
"gpu_available": True,
|
| 54 |
+
"profile": profile,
|
| 55 |
+
}
|
| 56 |
+
except Exception:
|
| 57 |
+
pass
|
| 58 |
+
|
| 59 |
+
return {
|
| 60 |
+
"gpu": "none",
|
| 61 |
+
"vram_gb": 0,
|
| 62 |
+
"gpu_available": False,
|
| 63 |
+
"profile": "cpu_edge",
|
| 64 |
+
}
|
| 65 |
+
|
| 66 |
+
|
| 67 |
+
def load_config(hardware_profile: str = "auto", models_cache: str = "models_cache") -> dict:
|
| 68 |
+
"""Load and merge configuration."""
|
| 69 |
+
config_dir = Path(__file__).parent / "config"
|
| 70 |
+
|
| 71 |
+
# Load default config
|
| 72 |
+
default_path = config_dir / "default.yaml"
|
| 73 |
+
if default_path.exists():
|
| 74 |
+
with open(default_path) as f:
|
| 75 |
+
config = yaml.safe_load(f)
|
| 76 |
+
else:
|
| 77 |
+
config = {}
|
| 78 |
+
|
| 79 |
+
# Load hardware profile
|
| 80 |
+
if hardware_profile != "auto":
|
| 81 |
+
profile_path = config_dir / "hardware_profiles" / f"{hardware_profile}.yaml"
|
| 82 |
+
if profile_path.exists():
|
| 83 |
+
with open(profile_path) as f:
|
| 84 |
+
profile = yaml.safe_load(f)
|
| 85 |
+
# Merge profile into config (profile overrides defaults)
|
| 86 |
+
_deep_merge(config, profile)
|
| 87 |
+
|
| 88 |
+
config["models_cache"] = models_cache
|
| 89 |
+
config["hardware_profile"] = hardware_profile
|
| 90 |
+
return config
|
| 91 |
+
|
| 92 |
+
|
| 93 |
+
def _deep_merge(base: dict, override: dict) -> dict:
|
| 94 |
+
"""Deep merge override into base."""
|
| 95 |
+
for key, value in override.items():
|
| 96 |
+
if key in base and isinstance(base[key], dict) and isinstance(value, dict):
|
| 97 |
+
_deep_merge(base[key], value)
|
| 98 |
+
else:
|
| 99 |
+
base[key] = value
|
| 100 |
+
return base
|
| 101 |
+
|
| 102 |
+
|
| 103 |
+
def boot():
|
| 104 |
+
"""Boot EDEN OS."""
|
| 105 |
+
start_time = time.monotonic()
|
| 106 |
+
|
| 107 |
+
logger.info("=" * 55)
|
| 108 |
+
logger.info(" EDEN OS v1.0 — BOOTING")
|
| 109 |
+
logger.info("=" * 55)
|
| 110 |
+
|
| 111 |
+
# Step 1: Detect hardware
|
| 112 |
+
hw = detect_hardware()
|
| 113 |
+
hardware_profile = os.getenv("EDEN_HARDWARE_PROFILE", "auto")
|
| 114 |
+
if hardware_profile == "auto":
|
| 115 |
+
hardware_profile = hw["profile"]
|
| 116 |
+
logger.info(f"Hardware: {hw['gpu']} — Profile: {hardware_profile}")
|
| 117 |
+
|
| 118 |
+
# Step 2: Load configuration
|
| 119 |
+
models_cache = os.getenv("EDEN_MODELS_CACHE", "models_cache")
|
| 120 |
+
config = load_config(hardware_profile, models_cache)
|
| 121 |
+
|
| 122 |
+
# Step 3: Create FastAPI app
|
| 123 |
+
from eden_os.gateway import create_app
|
| 124 |
+
host = os.getenv("EDEN_HOST", "0.0.0.0")
|
| 125 |
+
port = int(os.getenv("EDEN_PORT", "7860"))
|
| 126 |
+
|
| 127 |
+
app = create_app(
|
| 128 |
+
host=host,
|
| 129 |
+
port=port,
|
| 130 |
+
hardware_profile=hardware_profile,
|
| 131 |
+
models_cache=models_cache,
|
| 132 |
+
)
|
| 133 |
+
|
| 134 |
+
# Store config and hardware info on app state
|
| 135 |
+
app.state.config = config
|
| 136 |
+
app.state.hardware = hw
|
| 137 |
+
|
| 138 |
+
boot_time = time.monotonic() - start_time
|
| 139 |
+
|
| 140 |
+
logger.info("=" * 55)
|
| 141 |
+
logger.info(f" EDEN OS v1.0 — LIVE")
|
| 142 |
+
logger.info(f" URL: http://{host}:{port}")
|
| 143 |
+
logger.info(f" API: http://{host}:{port}/api/v1/docs")
|
| 144 |
+
logger.info(f" Hardware: {hw['gpu']} — Profile: {hardware_profile}")
|
| 145 |
+
logger.info(f" Eden Protocol: ACTIVE — Threshold: 0.3")
|
| 146 |
+
logger.info(f" Boot time: {boot_time:.1f}s")
|
| 147 |
+
logger.info(f" OWN THE SCIENCE.")
|
| 148 |
+
logger.info("=" * 55)
|
| 149 |
+
|
| 150 |
+
return app, host, port
|
| 151 |
+
|
| 152 |
+
|
| 153 |
+
# Create app at module level for uvicorn import
|
| 154 |
+
app, _host, _port = boot()
|
| 155 |
+
|
| 156 |
+
if __name__ == "__main__":
|
| 157 |
+
uvicorn.run(
|
| 158 |
+
app,
|
| 159 |
+
host=_host,
|
| 160 |
+
port=_port,
|
| 161 |
+
ws_max_size=16 * 1024 * 1024,
|
| 162 |
+
log_level="info",
|
| 163 |
+
)
|
config/default.yaml
ADDED
|
@@ -0,0 +1,62 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# EDEN OS — Default Configuration
|
| 2 |
+
# Auto-detected hardware profile overrides these values
|
| 3 |
+
|
| 4 |
+
eden_os:
|
| 5 |
+
version: "1.0.0"
|
| 6 |
+
codename: "OWN THE SCIENCE"
|
| 7 |
+
|
| 8 |
+
server:
|
| 9 |
+
host: "0.0.0.0"
|
| 10 |
+
port: 7860
|
| 11 |
+
ws_max_size: 16777216 # 16MB
|
| 12 |
+
|
| 13 |
+
pipeline:
|
| 14 |
+
target_fps: 30
|
| 15 |
+
max_latency_ms: 1500
|
| 16 |
+
eden_protocol_threshold: 0.3
|
| 17 |
+
|
| 18 |
+
defaults:
|
| 19 |
+
consistency: 0.7
|
| 20 |
+
latency: 1.0
|
| 21 |
+
expressiveness: 0.6
|
| 22 |
+
voice_tone: 0.85
|
| 23 |
+
eye_contact: 0.5
|
| 24 |
+
flirtation: 0.15
|
| 25 |
+
|
| 26 |
+
brain:
|
| 27 |
+
primary: "anthropic"
|
| 28 |
+
model: "claude-sonnet-4-20250514"
|
| 29 |
+
fallback: "local"
|
| 30 |
+
local_model: "Qwen/Qwen3-8B-GGUF"
|
| 31 |
+
max_tokens: 1024
|
| 32 |
+
temperature: 0.7
|
| 33 |
+
memory_window: 20
|
| 34 |
+
|
| 35 |
+
voice:
|
| 36 |
+
asr_model: "openai/whisper-large-v3-turbo"
|
| 37 |
+
asr_fallback: "base"
|
| 38 |
+
tts_primary: "kokoro"
|
| 39 |
+
tts_fallback: "silent"
|
| 40 |
+
sample_rate: 16000
|
| 41 |
+
vad_threshold: 0.02
|
| 42 |
+
|
| 43 |
+
animator:
|
| 44 |
+
engine: "liveportrait"
|
| 45 |
+
fallback: "procedural"
|
| 46 |
+
target_fps: 30
|
| 47 |
+
idle_blink_interval: [3.0, 7.0]
|
| 48 |
+
breath_cycle_seconds: 4.0
|
| 49 |
+
|
| 50 |
+
genesis:
|
| 51 |
+
face_detection: "mediapipe"
|
| 52 |
+
portrait_size: 512
|
| 53 |
+
eden_protocol_threshold: 0.3
|
| 54 |
+
|
| 55 |
+
scholar:
|
| 56 |
+
embedding_model: "sentence-transformers/all-MiniLM-L6-v2"
|
| 57 |
+
vector_store: "chromadb"
|
| 58 |
+
chunk_size: 500
|
| 59 |
+
chunk_overlap: 50
|
| 60 |
+
chromadb_path: "data/chromadb"
|
| 61 |
+
|
| 62 |
+
models_cache: "models_cache"
|
config/eden_protocol.yaml
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# EDEN OS — Eden Protocol Configuration
|
| 2 |
+
# Skin texture fidelity validation thresholds
|
| 3 |
+
|
| 4 |
+
eden_protocol:
|
| 5 |
+
# Maximum allowed standard deviation between generated and reference
|
| 6 |
+
threshold: 0.3
|
| 7 |
+
|
| 8 |
+
# Gabor filter bank parameters
|
| 9 |
+
gabor:
|
| 10 |
+
orientations: [0, 45, 90, 135] # degrees
|
| 11 |
+
frequencies: [0.1, 0.25, 0.4]
|
| 12 |
+
sigma: 3.0
|
| 13 |
+
kernel_size: 21
|
| 14 |
+
|
| 15 |
+
# Color space analysis
|
| 16 |
+
color_space: "LAB"
|
| 17 |
+
|
| 18 |
+
# Face region extraction
|
| 19 |
+
face_padding: 0.2 # padding around detected face bbox
|
| 20 |
+
|
| 21 |
+
# Validation strictness levels
|
| 22 |
+
levels:
|
| 23 |
+
strict: 0.2
|
| 24 |
+
standard: 0.3
|
| 25 |
+
relaxed: 0.5
|
config/hardware_profiles/cpu_edge.yaml
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# EDEN OS — CPU Edge/Demo Profile (No GPU)
|
| 2 |
+
hardware:
|
| 3 |
+
name: "CPU Edge/Demo"
|
| 4 |
+
gpu: "none"
|
| 5 |
+
vram_gb: 0
|
| 6 |
+
|
| 7 |
+
pipeline:
|
| 8 |
+
target_fps: 15
|
| 9 |
+
max_latency_ms: 3000
|
| 10 |
+
|
| 11 |
+
animator:
|
| 12 |
+
engine: "liveportrait"
|
| 13 |
+
target_fps: 15
|
| 14 |
+
|
| 15 |
+
voice:
|
| 16 |
+
tts_primary: "kokoro"
|
| 17 |
+
asr_fallback: "base"
|
| 18 |
+
|
| 19 |
+
brain:
|
| 20 |
+
primary: "anthropic"
|
| 21 |
+
fallback: "local"
|
config/hardware_profiles/h100_cinematic.yaml
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# EDEN OS — H100 Cinematic Profile (80GB VRAM)
|
| 2 |
+
hardware:
|
| 3 |
+
name: "H100 Cinematic"
|
| 4 |
+
gpu: "H100"
|
| 5 |
+
vram_gb: 80
|
| 6 |
+
|
| 7 |
+
pipeline:
|
| 8 |
+
target_fps: 60
|
| 9 |
+
max_latency_ms: 1000
|
| 10 |
+
|
| 11 |
+
animator:
|
| 12 |
+
engine: "liveportrait"
|
| 13 |
+
fallback: "hunyuan"
|
| 14 |
+
target_fps: 60
|
| 15 |
+
|
| 16 |
+
voice:
|
| 17 |
+
tts_primary: "cosyvoice2"
|
| 18 |
+
tts_fallback: "styletts2"
|
| 19 |
+
|
| 20 |
+
brain:
|
| 21 |
+
primary: "anthropic"
|
config/hardware_profiles/l4_cloud.yaml
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# EDEN OS — L4 Cloud GPU Profile (24GB VRAM, HuggingFace Spaces)
|
| 2 |
+
hardware:
|
| 3 |
+
name: "L4 Cloud GPU"
|
| 4 |
+
gpu: "L4"
|
| 5 |
+
vram_gb: 24
|
| 6 |
+
|
| 7 |
+
pipeline:
|
| 8 |
+
target_fps: 30
|
| 9 |
+
max_latency_ms: 1500
|
| 10 |
+
|
| 11 |
+
animator:
|
| 12 |
+
engine: "liveportrait"
|
| 13 |
+
target_fps: 30
|
| 14 |
+
|
| 15 |
+
voice:
|
| 16 |
+
tts_primary: "kokoro"
|
| 17 |
+
|
| 18 |
+
brain:
|
| 19 |
+
primary: "anthropic"
|
config/hardware_profiles/rtx3090_standard.yaml
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# EDEN OS — RTX 3090 Standard Profile (24GB VRAM)
|
| 2 |
+
hardware:
|
| 3 |
+
name: "RTX 3090 Standard"
|
| 4 |
+
gpu: "RTX 3090"
|
| 5 |
+
vram_gb: 24
|
| 6 |
+
|
| 7 |
+
pipeline:
|
| 8 |
+
target_fps: 30
|
| 9 |
+
max_latency_ms: 1800
|
| 10 |
+
|
| 11 |
+
animator:
|
| 12 |
+
engine: "liveportrait"
|
| 13 |
+
target_fps: 30
|
| 14 |
+
|
| 15 |
+
voice:
|
| 16 |
+
tts_primary: "kokoro"
|
| 17 |
+
|
| 18 |
+
brain:
|
| 19 |
+
primary: "local"
|
| 20 |
+
local_model: "Qwen/Qwen3-8B-GGUF"
|
config/hardware_profiles/rtx4090_production.yaml
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# EDEN OS — RTX 4090 Production Profile (24GB VRAM)
|
| 2 |
+
hardware:
|
| 3 |
+
name: "RTX 4090 Production"
|
| 4 |
+
gpu: "RTX 4090"
|
| 5 |
+
vram_gb: 24
|
| 6 |
+
|
| 7 |
+
pipeline:
|
| 8 |
+
target_fps: 60
|
| 9 |
+
max_latency_ms: 1300
|
| 10 |
+
|
| 11 |
+
animator:
|
| 12 |
+
engine: "liveportrait"
|
| 13 |
+
target_fps: 60
|
| 14 |
+
|
| 15 |
+
voice:
|
| 16 |
+
tts_primary: "cosyvoice2"
|
| 17 |
+
|
| 18 |
+
brain:
|
| 19 |
+
primary: "anthropic"
|
docker-compose.yml
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version: '3.8'
|
| 2 |
+
services:
|
| 3 |
+
eden-os:
|
| 4 |
+
build: .
|
| 5 |
+
ports:
|
| 6 |
+
- "7860:7860"
|
| 7 |
+
environment:
|
| 8 |
+
- ANTHROPIC_API_KEY=${ANTHROPIC_API_KEY}
|
| 9 |
+
- HF_TOKEN=${HF_TOKEN}
|
| 10 |
+
- EDEN_HARDWARE_PROFILE=auto
|
| 11 |
+
- EDEN_LOG_LEVEL=INFO
|
| 12 |
+
volumes:
|
| 13 |
+
- ./models_cache:/app/models_cache
|
| 14 |
+
- ./templates:/app/templates
|
| 15 |
+
- ./data:/app/data
|
| 16 |
+
deploy:
|
| 17 |
+
resources:
|
| 18 |
+
reservations:
|
| 19 |
+
devices:
|
| 20 |
+
- driver: nvidia
|
| 21 |
+
count: 1
|
| 22 |
+
capabilities: [gpu]
|
eden_os/__init__.py
ADDED
|
File without changes
|
eden_os/animator/__init__.py
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""EDEN OS — Animator Engine (Agent 3)"""
|
| 2 |
+
from eden_os.animator.animator_engine import AnimatorEngine
|
| 3 |
+
|
| 4 |
+
__all__ = ["AnimatorEngine"]
|
eden_os/animator/animator_engine.py
ADDED
|
@@ -0,0 +1,190 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
EDEN OS — Animator Engine
|
| 3 |
+
Main engine composing LivePortrait driver, idle generator, state machine,
|
| 4 |
+
audio-to-keypoints bridge, and temporal anchor.
|
| 5 |
+
Generates 60fps photorealistic facial animation driven by audio.
|
| 6 |
+
"""
|
| 7 |
+
|
| 8 |
+
import asyncio
|
| 9 |
+
import time
|
| 10 |
+
from typing import AsyncIterator, Optional
|
| 11 |
+
|
| 12 |
+
import numpy as np
|
| 13 |
+
from loguru import logger
|
| 14 |
+
|
| 15 |
+
from eden_os.shared.types import AudioChunk, AvatarState, VideoFrame
|
| 16 |
+
from eden_os.shared.interfaces import IAnimatorEngine
|
| 17 |
+
from eden_os.animator.liveportrait_driver import LivePortraitDriver
|
| 18 |
+
from eden_os.animator.idle_generator import IdleGenerator
|
| 19 |
+
from eden_os.animator.state_machine import AvatarStateMachine
|
| 20 |
+
from eden_os.animator.audio_to_keypoints import AudioToKeypoints
|
| 21 |
+
from eden_os.animator.eden_temporal_anchor import EdenTemporalAnchor
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
class AnimatorEngine(IAnimatorEngine):
|
| 25 |
+
"""
|
| 26 |
+
Agent 3: Lip-Sync + 4D Motion Engine.
|
| 27 |
+
Generates real-time facial animation driven by audio.
|
| 28 |
+
"""
|
| 29 |
+
|
| 30 |
+
def __init__(self, models_cache: str = "models_cache/liveportrait", fps: float = 30.0):
|
| 31 |
+
self.driver = LivePortraitDriver(models_cache=models_cache)
|
| 32 |
+
self.idle_gen = IdleGenerator(fps=fps)
|
| 33 |
+
self.state_machine = AvatarStateMachine()
|
| 34 |
+
self.audio_bridge = AudioToKeypoints()
|
| 35 |
+
self.temporal_anchor = EdenTemporalAnchor()
|
| 36 |
+
|
| 37 |
+
self._source_image: Optional[np.ndarray] = None
|
| 38 |
+
self._is_initialized = False
|
| 39 |
+
self._current_frame: Optional[VideoFrame] = None
|
| 40 |
+
self._idle_task: Optional[asyncio.Task] = None
|
| 41 |
+
self._conversation_turn = 0
|
| 42 |
+
self.fps = fps
|
| 43 |
+
|
| 44 |
+
async def initialize(self, portrait: np.ndarray) -> None:
|
| 45 |
+
"""Initialize the animator with a source portrait."""
|
| 46 |
+
await self.driver.load_models()
|
| 47 |
+
self.driver.set_source_image(portrait)
|
| 48 |
+
self._source_image = portrait
|
| 49 |
+
self.temporal_anchor.set_anchor(portrait)
|
| 50 |
+
self._is_initialized = True
|
| 51 |
+
logger.info("Animator engine initialized")
|
| 52 |
+
|
| 53 |
+
async def start_idle_loop(self, profile: dict) -> AsyncIterator[VideoFrame]:
|
| 54 |
+
"""Generate continuous idle animation frames."""
|
| 55 |
+
if not self._is_initialized:
|
| 56 |
+
raise RuntimeError("Animator not initialized. Call initialize() first.")
|
| 57 |
+
|
| 58 |
+
await self.state_machine.transition_to(AvatarState.LISTENING)
|
| 59 |
+
|
| 60 |
+
async for frame in self.idle_gen.generate_idle_frames(
|
| 61 |
+
source_image=self._source_image,
|
| 62 |
+
base_keypoints=self.driver.source_keypoints,
|
| 63 |
+
render_fn=self.driver.render_frame,
|
| 64 |
+
):
|
| 65 |
+
# Apply temporal anchor for identity preservation
|
| 66 |
+
frame.pixels = self.temporal_anchor.stabilize_frame(
|
| 67 |
+
frame.pixels, self._conversation_turn
|
| 68 |
+
)
|
| 69 |
+
self._current_frame = frame
|
| 70 |
+
yield frame
|
| 71 |
+
|
| 72 |
+
async def drive_from_audio(
|
| 73 |
+
self, audio: AsyncIterator[AudioChunk]
|
| 74 |
+
) -> AsyncIterator[VideoFrame]:
|
| 75 |
+
"""Generate lip-synced animation driven by audio stream."""
|
| 76 |
+
if not self._is_initialized:
|
| 77 |
+
raise RuntimeError("Animator not initialized. Call initialize() first.")
|
| 78 |
+
|
| 79 |
+
await self.state_machine.transition_to(AvatarState.SPEAKING)
|
| 80 |
+
frame_interval = 1.0 / self.fps
|
| 81 |
+
|
| 82 |
+
async for chunk in audio:
|
| 83 |
+
frame_start = time.monotonic()
|
| 84 |
+
|
| 85 |
+
# Extract audio features and convert to keypoint params
|
| 86 |
+
audio_features = self.audio_bridge.process_audio_chunk(chunk.data)
|
| 87 |
+
|
| 88 |
+
# Get emotion from chunk if available
|
| 89 |
+
emotion = getattr(chunk, "emotion", None)
|
| 90 |
+
|
| 91 |
+
# Apply to LivePortrait keypoints
|
| 92 |
+
keypoints = self.driver.apply_audio_keypoints(audio_features, emotion)
|
| 93 |
+
|
| 94 |
+
# Get transition blend if we're mid-transition
|
| 95 |
+
blend = self.state_machine.get_animation_blend()
|
| 96 |
+
if blend.get("is_transitioning"):
|
| 97 |
+
# Blend with idle keypoints during transition
|
| 98 |
+
idle_deltas = self.idle_gen.get_idle_keypoint_deltas(
|
| 99 |
+
time.monotonic() - frame_start
|
| 100 |
+
)
|
| 101 |
+
idle_kp = self.idle_gen.apply_idle_to_keypoints(
|
| 102 |
+
self.driver.source_keypoints, idle_deltas
|
| 103 |
+
)
|
| 104 |
+
factor = blend["blend_factor"]
|
| 105 |
+
keypoints = keypoints * factor + idle_kp * (1 - factor)
|
| 106 |
+
|
| 107 |
+
# Render frame
|
| 108 |
+
pixels = self.driver.render_frame(keypoints)
|
| 109 |
+
|
| 110 |
+
# Apply temporal anchor
|
| 111 |
+
pixels = self.temporal_anchor.stabilize_frame(
|
| 112 |
+
pixels, self._conversation_turn
|
| 113 |
+
)
|
| 114 |
+
|
| 115 |
+
frame = VideoFrame(
|
| 116 |
+
pixels=pixels,
|
| 117 |
+
timestamp_ms=time.monotonic() * 1000,
|
| 118 |
+
state=AvatarState.SPEAKING,
|
| 119 |
+
eden_score=1.0,
|
| 120 |
+
)
|
| 121 |
+
self._current_frame = frame
|
| 122 |
+
yield frame
|
| 123 |
+
|
| 124 |
+
# If this is the final audio chunk, transition back to listening
|
| 125 |
+
if chunk.is_final:
|
| 126 |
+
self._conversation_turn += 1
|
| 127 |
+
await self.state_machine.transition_to(AvatarState.LISTENING)
|
| 128 |
+
|
| 129 |
+
# Maintain frame rate
|
| 130 |
+
elapsed = time.monotonic() - frame_start
|
| 131 |
+
if elapsed < frame_interval:
|
| 132 |
+
await asyncio.sleep(frame_interval - elapsed)
|
| 133 |
+
|
| 134 |
+
async def transition_state(
|
| 135 |
+
self, from_state: AvatarState, to_state: AvatarState
|
| 136 |
+
) -> None:
|
| 137 |
+
"""Transition between avatar states."""
|
| 138 |
+
interrupt = (
|
| 139 |
+
from_state == AvatarState.SPEAKING
|
| 140 |
+
and to_state == AvatarState.LISTENING
|
| 141 |
+
)
|
| 142 |
+
await self.state_machine.transition_to(to_state, interrupt=interrupt)
|
| 143 |
+
|
| 144 |
+
if interrupt:
|
| 145 |
+
self.audio_bridge.reset()
|
| 146 |
+
self._conversation_turn += 1
|
| 147 |
+
|
| 148 |
+
async def get_current_frame(self) -> VideoFrame:
|
| 149 |
+
"""Get the most recently rendered frame."""
|
| 150 |
+
if self._current_frame is not None:
|
| 151 |
+
return self._current_frame
|
| 152 |
+
|
| 153 |
+
# Return a default frame if nothing has been rendered yet
|
| 154 |
+
if self._source_image is not None:
|
| 155 |
+
return VideoFrame(
|
| 156 |
+
pixels=self._source_image,
|
| 157 |
+
timestamp_ms=time.monotonic() * 1000,
|
| 158 |
+
state=self.state_machine.state,
|
| 159 |
+
eden_score=1.0,
|
| 160 |
+
)
|
| 161 |
+
|
| 162 |
+
return VideoFrame(
|
| 163 |
+
pixels=np.zeros((512, 512, 3), dtype=np.uint8),
|
| 164 |
+
timestamp_ms=time.monotonic() * 1000,
|
| 165 |
+
state=AvatarState.IDLE,
|
| 166 |
+
eden_score=0.0,
|
| 167 |
+
)
|
| 168 |
+
|
| 169 |
+
def apply_eden_anchor(self, frame: np.ndarray) -> np.ndarray:
|
| 170 |
+
"""Apply Eden temporal anchor to a frame."""
|
| 171 |
+
return self.temporal_anchor.stabilize_frame(frame, self._conversation_turn)
|
| 172 |
+
|
| 173 |
+
def update_settings(self, settings: dict) -> None:
|
| 174 |
+
"""Update animator settings from admin panel sliders."""
|
| 175 |
+
self.driver.update_settings(settings)
|
| 176 |
+
|
| 177 |
+
if "eye_contact" in settings:
|
| 178 |
+
self.driver.gaze_lock = float(settings["eye_contact"])
|
| 179 |
+
|
| 180 |
+
def get_status(self) -> dict:
|
| 181 |
+
"""Get animator status for monitoring."""
|
| 182 |
+
return {
|
| 183 |
+
"initialized": self._is_initialized,
|
| 184 |
+
"state": self.state_machine.state.value,
|
| 185 |
+
"conversation_turn": self._conversation_turn,
|
| 186 |
+
"frame_count": self.driver.frame_count,
|
| 187 |
+
"fps": self.fps,
|
| 188 |
+
"temporal_anchor": self.temporal_anchor.get_status(),
|
| 189 |
+
"state_info": self.state_machine.get_state_info(),
|
| 190 |
+
}
|
eden_os/animator/audio_to_keypoints.py
ADDED
|
@@ -0,0 +1,146 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
EDEN OS — Audio-to-Keypoints Bridge
|
| 3 |
+
Converts audio waveform features into LivePortrait-compatible implicit keypoint deltas.
|
| 4 |
+
This replaces the need for a driving video — audio becomes the driver.
|
| 5 |
+
"""
|
| 6 |
+
|
| 7 |
+
import numpy as np
|
| 8 |
+
from loguru import logger
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
class AudioToKeypoints:
|
| 12 |
+
"""Converts audio features to facial keypoint deltas for lip-sync animation."""
|
| 13 |
+
|
| 14 |
+
def __init__(self, sample_rate: int = 16000):
|
| 15 |
+
self.sample_rate = sample_rate
|
| 16 |
+
self._mel_bands = 80
|
| 17 |
+
self._hop_length = 160 # 10ms at 16kHz
|
| 18 |
+
self._win_length = 400 # 25ms at 16kHz
|
| 19 |
+
|
| 20 |
+
# Viseme mapping: phoneme categories to mouth shape parameters
|
| 21 |
+
# Each viseme defines: mouth_open, mouth_width, lip_round
|
| 22 |
+
self._energy_history: list[float] = []
|
| 23 |
+
self._pitch_history: list[float] = []
|
| 24 |
+
self._smoothing_alpha = 0.3
|
| 25 |
+
|
| 26 |
+
def extract_features(self, audio_chunk: np.ndarray) -> dict:
|
| 27 |
+
"""
|
| 28 |
+
Extract audio features from a PCM audio chunk.
|
| 29 |
+
|
| 30 |
+
Returns dict with:
|
| 31 |
+
energy: float 0.0-1.0 (volume/loudness)
|
| 32 |
+
pitch: float 0.0-1.0 (normalized fundamental frequency)
|
| 33 |
+
spectral_centroid: float (brightness of sound)
|
| 34 |
+
is_voiced: bool (speech detected vs silence)
|
| 35 |
+
"""
|
| 36 |
+
if len(audio_chunk) == 0:
|
| 37 |
+
return {
|
| 38 |
+
"energy": 0.0,
|
| 39 |
+
"pitch": 0.0,
|
| 40 |
+
"spectral_centroid": 0.0,
|
| 41 |
+
"is_voiced": False,
|
| 42 |
+
}
|
| 43 |
+
|
| 44 |
+
# Normalize audio
|
| 45 |
+
audio = audio_chunk.astype(np.float32)
|
| 46 |
+
if audio.max() > 1.0:
|
| 47 |
+
audio = audio / 32768.0
|
| 48 |
+
|
| 49 |
+
# RMS energy
|
| 50 |
+
rms = np.sqrt(np.mean(audio ** 2))
|
| 51 |
+
energy = np.clip(rms * 10.0, 0.0, 1.0)
|
| 52 |
+
|
| 53 |
+
# Smooth energy
|
| 54 |
+
self._energy_history.append(energy)
|
| 55 |
+
if len(self._energy_history) > 10:
|
| 56 |
+
self._energy_history.pop(0)
|
| 57 |
+
smoothed_energy = np.mean(self._energy_history[-5:])
|
| 58 |
+
|
| 59 |
+
# Pitch estimation via autocorrelation
|
| 60 |
+
pitch = self._estimate_pitch(audio)
|
| 61 |
+
|
| 62 |
+
# Smooth pitch
|
| 63 |
+
self._pitch_history.append(pitch)
|
| 64 |
+
if len(self._pitch_history) > 10:
|
| 65 |
+
self._pitch_history.pop(0)
|
| 66 |
+
smoothed_pitch = np.mean(self._pitch_history[-3:])
|
| 67 |
+
|
| 68 |
+
# Spectral centroid
|
| 69 |
+
spectrum = np.abs(np.fft.rfft(audio))
|
| 70 |
+
freqs = np.fft.rfftfreq(len(audio), 1.0 / self.sample_rate)
|
| 71 |
+
if spectrum.sum() > 0:
|
| 72 |
+
centroid = np.sum(freqs * spectrum) / np.sum(spectrum)
|
| 73 |
+
centroid_norm = np.clip(centroid / 4000.0, 0.0, 1.0)
|
| 74 |
+
else:
|
| 75 |
+
centroid_norm = 0.0
|
| 76 |
+
|
| 77 |
+
# Voice activity
|
| 78 |
+
is_voiced = smoothed_energy > 0.02
|
| 79 |
+
|
| 80 |
+
return {
|
| 81 |
+
"energy": float(smoothed_energy),
|
| 82 |
+
"pitch": float(smoothed_pitch),
|
| 83 |
+
"spectral_centroid": float(centroid_norm),
|
| 84 |
+
"is_voiced": bool(is_voiced),
|
| 85 |
+
}
|
| 86 |
+
|
| 87 |
+
def _estimate_pitch(self, audio: np.ndarray) -> float:
|
| 88 |
+
"""Estimate pitch via autocorrelation method."""
|
| 89 |
+
if len(audio) < 512:
|
| 90 |
+
return 0.0
|
| 91 |
+
|
| 92 |
+
# Autocorrelation
|
| 93 |
+
corr = np.correlate(audio[:1024], audio[:1024], mode="full")
|
| 94 |
+
corr = corr[len(corr) // 2:]
|
| 95 |
+
|
| 96 |
+
# Find first peak after initial decline
|
| 97 |
+
# Min period: 50Hz = 320 samples at 16kHz
|
| 98 |
+
# Max period: 400Hz = 40 samples at 16kHz
|
| 99 |
+
min_lag = self.sample_rate // 400 # ~40
|
| 100 |
+
max_lag = self.sample_rate // 50 # ~320
|
| 101 |
+
|
| 102 |
+
if max_lag >= len(corr):
|
| 103 |
+
max_lag = len(corr) - 1
|
| 104 |
+
if min_lag >= max_lag:
|
| 105 |
+
return 0.0
|
| 106 |
+
|
| 107 |
+
search_region = corr[min_lag:max_lag]
|
| 108 |
+
if len(search_region) == 0:
|
| 109 |
+
return 0.0
|
| 110 |
+
|
| 111 |
+
peak_idx = np.argmax(search_region) + min_lag
|
| 112 |
+
|
| 113 |
+
if corr[0] > 0 and corr[peak_idx] / corr[0] > 0.3:
|
| 114 |
+
freq = self.sample_rate / peak_idx
|
| 115 |
+
# Normalize to 0-1 range (80Hz = 0.0, 400Hz = 1.0)
|
| 116 |
+
pitch_norm = np.clip((freq - 80) / 320, 0.0, 1.0)
|
| 117 |
+
return float(pitch_norm)
|
| 118 |
+
|
| 119 |
+
return 0.0
|
| 120 |
+
|
| 121 |
+
def features_to_keypoint_delta(self, features: dict) -> dict:
|
| 122 |
+
"""
|
| 123 |
+
Convert extracted audio features to animation parameters.
|
| 124 |
+
|
| 125 |
+
Returns dict compatible with LivePortraitDriver.apply_audio_keypoints()
|
| 126 |
+
"""
|
| 127 |
+
if not features.get("is_voiced", False):
|
| 128 |
+
return {"energy": 0.0, "pitch": 0.0}
|
| 129 |
+
|
| 130 |
+
return {
|
| 131 |
+
"energy": features["energy"],
|
| 132 |
+
"pitch": features["pitch"],
|
| 133 |
+
"spectral_centroid": features.get("spectral_centroid", 0.0),
|
| 134 |
+
}
|
| 135 |
+
|
| 136 |
+
def process_audio_chunk(self, audio_data: np.ndarray) -> dict:
|
| 137 |
+
"""
|
| 138 |
+
Full pipeline: audio chunk → features → keypoint-ready parameters.
|
| 139 |
+
"""
|
| 140 |
+
features = self.extract_features(audio_data)
|
| 141 |
+
return self.features_to_keypoint_delta(features)
|
| 142 |
+
|
| 143 |
+
def reset(self) -> None:
|
| 144 |
+
"""Reset state for new conversation turn."""
|
| 145 |
+
self._energy_history.clear()
|
| 146 |
+
self._pitch_history.clear()
|
eden_os/animator/eden_temporal_anchor.py
ADDED
|
@@ -0,0 +1,168 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
EDEN OS — Eden Temporal Anchor
|
| 3 |
+
Implements temporal consistency system adapted from LONGLIVE's frame sink concept.
|
| 4 |
+
Prevents identity drift over long conversations by maintaining anchor frames.
|
| 5 |
+
"""
|
| 6 |
+
|
| 7 |
+
import time
|
| 8 |
+
from typing import Optional
|
| 9 |
+
|
| 10 |
+
import cv2
|
| 11 |
+
import numpy as np
|
| 12 |
+
from loguru import logger
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
class EdenTemporalAnchor:
|
| 16 |
+
"""
|
| 17 |
+
Maintains identity consistency over long conversations.
|
| 18 |
+
|
| 19 |
+
Adapted from LONGLIVE (arXiv:2509.22622):
|
| 20 |
+
- Keeps the first frame of each conversation as a global anchor
|
| 21 |
+
- Periodically refreshes anchor to prevent staleness
|
| 22 |
+
- Blends current frames with anchor to maintain identity
|
| 23 |
+
"""
|
| 24 |
+
|
| 25 |
+
def __init__(self, refresh_interval: int = 5, min_anchor_weight: float = 0.1):
|
| 26 |
+
"""
|
| 27 |
+
Args:
|
| 28 |
+
refresh_interval: Refresh anchor every N conversation turns
|
| 29 |
+
min_anchor_weight: Minimum blend weight for anchor (never fully ignored)
|
| 30 |
+
"""
|
| 31 |
+
self.refresh_interval = refresh_interval
|
| 32 |
+
self.min_anchor_weight = min_anchor_weight
|
| 33 |
+
|
| 34 |
+
self._global_anchor: Optional[np.ndarray] = None
|
| 35 |
+
self._anchor_features: Optional[np.ndarray] = None
|
| 36 |
+
self._turn_count: int = 0
|
| 37 |
+
self._decay_rate: float = 0.02 # Weight decay per turn
|
| 38 |
+
self._creation_time: float = 0.0
|
| 39 |
+
|
| 40 |
+
def set_anchor(self, frame: np.ndarray) -> None:
|
| 41 |
+
"""Set the global identity anchor frame."""
|
| 42 |
+
self._global_anchor = frame.copy()
|
| 43 |
+
self._anchor_features = self._extract_identity_features(frame)
|
| 44 |
+
self._creation_time = time.monotonic()
|
| 45 |
+
self._turn_count = 0
|
| 46 |
+
logger.info("Global identity anchor set")
|
| 47 |
+
|
| 48 |
+
def _extract_identity_features(self, frame: np.ndarray) -> np.ndarray:
|
| 49 |
+
"""
|
| 50 |
+
Extract identity-critical features from a frame.
|
| 51 |
+
Focuses on skin texture, facial proportions, and color profile.
|
| 52 |
+
"""
|
| 53 |
+
# Resize to standard analysis size
|
| 54 |
+
analysis = cv2.resize(frame, (128, 128))
|
| 55 |
+
|
| 56 |
+
# Convert to LAB for perceptually-uniform color analysis
|
| 57 |
+
lab = cv2.cvtColor(analysis, cv2.COLOR_RGB2LAB).astype(np.float32)
|
| 58 |
+
|
| 59 |
+
# Extract features:
|
| 60 |
+
# 1. Color histogram (identity-linked skin tone)
|
| 61 |
+
l_hist = np.histogram(lab[:, :, 0], bins=32, range=(0, 255))[0].astype(np.float32)
|
| 62 |
+
a_hist = np.histogram(lab[:, :, 1], bins=32, range=(0, 255))[0].astype(np.float32)
|
| 63 |
+
b_hist = np.histogram(lab[:, :, 2], bins=32, range=(0, 255))[0].astype(np.float32)
|
| 64 |
+
|
| 65 |
+
# Normalize histograms
|
| 66 |
+
l_hist /= l_hist.sum() + 1e-8
|
| 67 |
+
a_hist /= a_hist.sum() + 1e-8
|
| 68 |
+
b_hist /= b_hist.sum() + 1e-8
|
| 69 |
+
|
| 70 |
+
# 2. Spatial structure (downsampled face layout)
|
| 71 |
+
gray = cv2.cvtColor(analysis, cv2.COLOR_RGB2GRAY)
|
| 72 |
+
structure = cv2.resize(gray, (16, 16)).flatten().astype(np.float32) / 255.0
|
| 73 |
+
|
| 74 |
+
# Combine into feature vector
|
| 75 |
+
features = np.concatenate([l_hist, a_hist, b_hist, structure])
|
| 76 |
+
return features
|
| 77 |
+
|
| 78 |
+
def compute_drift(self, current_frame: np.ndarray) -> float:
|
| 79 |
+
"""
|
| 80 |
+
Compute identity drift between current frame and anchor.
|
| 81 |
+
Returns drift score (0.0 = identical, higher = more drift).
|
| 82 |
+
"""
|
| 83 |
+
if self._anchor_features is None:
|
| 84 |
+
return 0.0
|
| 85 |
+
|
| 86 |
+
current_features = self._extract_identity_features(current_frame)
|
| 87 |
+
drift = np.sqrt(np.mean((current_features - self._anchor_features) ** 2))
|
| 88 |
+
return float(drift)
|
| 89 |
+
|
| 90 |
+
def stabilize_frame(
|
| 91 |
+
self, current_frame: np.ndarray, conversation_turn: int
|
| 92 |
+
) -> np.ndarray:
|
| 93 |
+
"""
|
| 94 |
+
Apply temporal anchoring to stabilize a frame's identity.
|
| 95 |
+
|
| 96 |
+
Blends the current frame with the anchor based on decay weight.
|
| 97 |
+
This prevents gradual identity drift over long conversations.
|
| 98 |
+
"""
|
| 99 |
+
if self._global_anchor is None:
|
| 100 |
+
return current_frame
|
| 101 |
+
|
| 102 |
+
self._turn_count = conversation_turn
|
| 103 |
+
|
| 104 |
+
# Refresh anchor periodically
|
| 105 |
+
if conversation_turn > 0 and conversation_turn % self.refresh_interval == 0:
|
| 106 |
+
self._refresh_anchor(current_frame)
|
| 107 |
+
|
| 108 |
+
# Compute anchor weight (decays over time but never reaches zero)
|
| 109 |
+
anchor_weight = max(
|
| 110 |
+
self.min_anchor_weight,
|
| 111 |
+
1.0 - (conversation_turn * self._decay_rate),
|
| 112 |
+
)
|
| 113 |
+
|
| 114 |
+
# Only blend if drift is detected
|
| 115 |
+
drift = self.compute_drift(current_frame)
|
| 116 |
+
if drift < 0.05:
|
| 117 |
+
return current_frame # No significant drift
|
| 118 |
+
|
| 119 |
+
# Blend in LAB space for perceptual smoothness
|
| 120 |
+
anchor_resized = cv2.resize(self._global_anchor, (current_frame.shape[1], current_frame.shape[0]))
|
| 121 |
+
|
| 122 |
+
current_lab = cv2.cvtColor(current_frame, cv2.COLOR_RGB2LAB).astype(np.float32)
|
| 123 |
+
anchor_lab = cv2.cvtColor(anchor_resized, cv2.COLOR_RGB2LAB).astype(np.float32)
|
| 124 |
+
|
| 125 |
+
# Blend color channels (preserve structure from current, tone from anchor)
|
| 126 |
+
blended_lab = current_lab.copy()
|
| 127 |
+
# Only blend color channels (a, b), keep lightness from current frame
|
| 128 |
+
blend_strength = anchor_weight * 0.3 # Subtle color correction
|
| 129 |
+
blended_lab[:, :, 1] = (
|
| 130 |
+
current_lab[:, :, 1] * (1 - blend_strength)
|
| 131 |
+
+ anchor_lab[:, :, 1] * blend_strength
|
| 132 |
+
)
|
| 133 |
+
blended_lab[:, :, 2] = (
|
| 134 |
+
current_lab[:, :, 2] * (1 - blend_strength)
|
| 135 |
+
+ anchor_lab[:, :, 2] * blend_strength
|
| 136 |
+
)
|
| 137 |
+
|
| 138 |
+
blended = cv2.cvtColor(
|
| 139 |
+
np.clip(blended_lab, 0, 255).astype(np.uint8), cv2.COLOR_LAB2RGB
|
| 140 |
+
)
|
| 141 |
+
|
| 142 |
+
return blended
|
| 143 |
+
|
| 144 |
+
def _refresh_anchor(self, current_frame: np.ndarray) -> None:
|
| 145 |
+
"""Refresh anchor with current frame (partial update)."""
|
| 146 |
+
if self._global_anchor is None:
|
| 147 |
+
self.set_anchor(current_frame)
|
| 148 |
+
return
|
| 149 |
+
|
| 150 |
+
# Blend old anchor with current frame (70% old, 30% new)
|
| 151 |
+
anchor_resized = cv2.resize(
|
| 152 |
+
self._global_anchor, (current_frame.shape[1], current_frame.shape[0])
|
| 153 |
+
)
|
| 154 |
+
self._global_anchor = cv2.addWeighted(anchor_resized, 0.7, current_frame, 0.3, 0)
|
| 155 |
+
self._anchor_features = self._extract_identity_features(self._global_anchor)
|
| 156 |
+
logger.debug(f"Anchor refreshed at turn {self._turn_count}")
|
| 157 |
+
|
| 158 |
+
def get_status(self) -> dict:
|
| 159 |
+
"""Get anchor status for monitoring."""
|
| 160 |
+
return {
|
| 161 |
+
"has_anchor": self._global_anchor is not None,
|
| 162 |
+
"turn_count": self._turn_count,
|
| 163 |
+
"uptime_seconds": time.monotonic() - self._creation_time if self._global_anchor is not None else 0,
|
| 164 |
+
"current_weight": max(
|
| 165 |
+
self.min_anchor_weight,
|
| 166 |
+
1.0 - (self._turn_count * self._decay_rate),
|
| 167 |
+
),
|
| 168 |
+
}
|
eden_os/animator/idle_generator.py
ADDED
|
@@ -0,0 +1,226 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
EDEN OS — Idle Animation Generator
|
| 3 |
+
Generates continuous LISTENING state idle loop with natural blinks,
|
| 4 |
+
micro head movements, breathing, and occasional eyebrow raises.
|
| 5 |
+
The avatar is NEVER frozen.
|
| 6 |
+
"""
|
| 7 |
+
|
| 8 |
+
import asyncio
|
| 9 |
+
import time
|
| 10 |
+
from typing import AsyncIterator, Optional
|
| 11 |
+
|
| 12 |
+
import numpy as np
|
| 13 |
+
from loguru import logger
|
| 14 |
+
|
| 15 |
+
from eden_os.shared.types import AvatarState, VideoFrame
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
class IdleGenerator:
|
| 19 |
+
"""Generates natural idle animations for the avatar."""
|
| 20 |
+
|
| 21 |
+
def __init__(self, fps: float = 30.0):
|
| 22 |
+
self.fps = fps
|
| 23 |
+
self.frame_interval = 1.0 / fps
|
| 24 |
+
self._running = False
|
| 25 |
+
self._start_time: float = 0.0
|
| 26 |
+
self._frame_count: int = 0
|
| 27 |
+
|
| 28 |
+
# Blink parameters
|
| 29 |
+
self._next_blink_time: float = 0.0
|
| 30 |
+
self._blink_duration: float = 0.15 # seconds
|
| 31 |
+
self._blink_progress: float = 0.0
|
| 32 |
+
self._is_blinking: bool = False
|
| 33 |
+
|
| 34 |
+
# Breathing parameters
|
| 35 |
+
self._breath_cycle: float = 4.0 # seconds per breath
|
| 36 |
+
self._breath_amplitude: float = 0.005
|
| 37 |
+
|
| 38 |
+
# Head micro-movement
|
| 39 |
+
self._head_sway_freq: float = 0.3 # Hz
|
| 40 |
+
self._head_sway_amp: float = 0.01 # radians equivalent
|
| 41 |
+
|
| 42 |
+
# Eyebrow micro-raise
|
| 43 |
+
self._next_brow_time: float = 0.0
|
| 44 |
+
self._brow_raise_duration: float = 0.5
|
| 45 |
+
self._brow_progress: float = 0.0
|
| 46 |
+
self._is_brow_raising: bool = False
|
| 47 |
+
|
| 48 |
+
def _schedule_next_blink(self, current_time: float) -> None:
|
| 49 |
+
"""Schedule next blink at random interval (3-7 seconds)."""
|
| 50 |
+
interval = np.random.uniform(3.0, 7.0)
|
| 51 |
+
self._next_blink_time = current_time + interval
|
| 52 |
+
|
| 53 |
+
def _schedule_next_brow_raise(self, current_time: float) -> None:
|
| 54 |
+
"""Schedule next eyebrow micro-raise (8-15 seconds)."""
|
| 55 |
+
interval = np.random.uniform(8.0, 15.0)
|
| 56 |
+
self._next_brow_time = current_time + interval
|
| 57 |
+
|
| 58 |
+
def get_idle_keypoint_deltas(self, elapsed: float) -> dict:
|
| 59 |
+
"""
|
| 60 |
+
Compute keypoint deltas for idle animation at given elapsed time.
|
| 61 |
+
|
| 62 |
+
Returns dict with deltas for each facial feature.
|
| 63 |
+
"""
|
| 64 |
+
deltas = {
|
| 65 |
+
"eye_blink": 0.0, # 0=open, 1=closed
|
| 66 |
+
"brow_raise": 0.0, # 0=neutral, 1=raised
|
| 67 |
+
"head_x": 0.0, # horizontal rotation delta
|
| 68 |
+
"head_y": 0.0, # vertical rotation delta
|
| 69 |
+
"head_z": 0.0, # tilt delta
|
| 70 |
+
"breath_y": 0.0, # vertical breathing motion
|
| 71 |
+
"mouth_tension": 0.0, # slight mouth movement
|
| 72 |
+
}
|
| 73 |
+
|
| 74 |
+
# --- Breathing ---
|
| 75 |
+
breath_phase = (elapsed % self._breath_cycle) / self._breath_cycle
|
| 76 |
+
breath_value = np.sin(breath_phase * 2 * np.pi) * self._breath_amplitude
|
| 77 |
+
deltas["breath_y"] = float(breath_value)
|
| 78 |
+
|
| 79 |
+
# --- Head micro-sway ---
|
| 80 |
+
# Lissajous-like pattern for natural head movement
|
| 81 |
+
deltas["head_x"] = float(
|
| 82 |
+
np.sin(elapsed * self._head_sway_freq * 2 * np.pi) * self._head_sway_amp
|
| 83 |
+
)
|
| 84 |
+
deltas["head_y"] = float(
|
| 85 |
+
np.sin(elapsed * self._head_sway_freq * 1.3 * 2 * np.pi + 0.7)
|
| 86 |
+
* self._head_sway_amp * 0.6
|
| 87 |
+
)
|
| 88 |
+
deltas["head_z"] = float(
|
| 89 |
+
np.sin(elapsed * self._head_sway_freq * 0.7 * 2 * np.pi + 1.4)
|
| 90 |
+
* self._head_sway_amp * 0.3
|
| 91 |
+
)
|
| 92 |
+
|
| 93 |
+
# --- Blinking ---
|
| 94 |
+
if not self._is_blinking and elapsed >= self._next_blink_time:
|
| 95 |
+
self._is_blinking = True
|
| 96 |
+
self._blink_progress = 0.0
|
| 97 |
+
|
| 98 |
+
if self._is_blinking:
|
| 99 |
+
self._blink_progress += self.frame_interval / self._blink_duration
|
| 100 |
+
if self._blink_progress >= 1.0:
|
| 101 |
+
self._is_blinking = False
|
| 102 |
+
self._blink_progress = 0.0
|
| 103 |
+
self._schedule_next_blink(elapsed)
|
| 104 |
+
deltas["eye_blink"] = 0.0
|
| 105 |
+
else:
|
| 106 |
+
# Smooth blink curve: fast close, slower open
|
| 107 |
+
if self._blink_progress < 0.3:
|
| 108 |
+
deltas["eye_blink"] = float(self._blink_progress / 0.3)
|
| 109 |
+
else:
|
| 110 |
+
deltas["eye_blink"] = float(1.0 - (self._blink_progress - 0.3) / 0.7)
|
| 111 |
+
|
| 112 |
+
# --- Eyebrow micro-raise ---
|
| 113 |
+
if not self._is_brow_raising and elapsed >= self._next_brow_time:
|
| 114 |
+
self._is_brow_raising = True
|
| 115 |
+
self._brow_progress = 0.0
|
| 116 |
+
|
| 117 |
+
if self._is_brow_raising:
|
| 118 |
+
self._brow_progress += self.frame_interval / self._brow_raise_duration
|
| 119 |
+
if self._brow_progress >= 1.0:
|
| 120 |
+
self._is_brow_raising = False
|
| 121 |
+
self._brow_progress = 0.0
|
| 122 |
+
self._schedule_next_brow_raise(elapsed)
|
| 123 |
+
deltas["brow_raise"] = 0.0
|
| 124 |
+
else:
|
| 125 |
+
# Smooth up and down
|
| 126 |
+
deltas["brow_raise"] = float(
|
| 127 |
+
np.sin(self._brow_progress * np.pi) * 0.3
|
| 128 |
+
)
|
| 129 |
+
|
| 130 |
+
# --- Subtle mouth tension (micro-expression) ---
|
| 131 |
+
deltas["mouth_tension"] = float(
|
| 132 |
+
np.sin(elapsed * 0.15 * 2 * np.pi) * 0.002
|
| 133 |
+
)
|
| 134 |
+
|
| 135 |
+
return deltas
|
| 136 |
+
|
| 137 |
+
def apply_idle_to_keypoints(
|
| 138 |
+
self, base_keypoints: np.ndarray, deltas: dict
|
| 139 |
+
) -> np.ndarray:
|
| 140 |
+
"""Apply idle animation deltas to base keypoints."""
|
| 141 |
+
keypoints = base_keypoints.copy()
|
| 142 |
+
|
| 143 |
+
# Eye blink: move top and bottom eyelids
|
| 144 |
+
blink = deltas.get("eye_blink", 0.0)
|
| 145 |
+
keypoints[6][1] += blink * 0.02 # left eye top down
|
| 146 |
+
keypoints[7][1] -= blink * 0.015 # left eye bottom up
|
| 147 |
+
keypoints[9][1] += blink * 0.02 # right eye top down
|
| 148 |
+
keypoints[10][1] -= blink * 0.015 # right eye bottom up
|
| 149 |
+
|
| 150 |
+
# Brow raise
|
| 151 |
+
brow = deltas.get("brow_raise", 0.0)
|
| 152 |
+
keypoints[19][1] -= brow * 0.02 # left brow up
|
| 153 |
+
keypoints[20][1] -= brow * 0.02 # right brow up
|
| 154 |
+
|
| 155 |
+
# Head movement (apply to all keypoints)
|
| 156 |
+
head_x = deltas.get("head_x", 0.0)
|
| 157 |
+
head_y = deltas.get("head_y", 0.0)
|
| 158 |
+
keypoints[:, 0] += head_x
|
| 159 |
+
keypoints[:, 1] += head_y
|
| 160 |
+
|
| 161 |
+
# Breathing (vertical shift on lower face)
|
| 162 |
+
breath = deltas.get("breath_y", 0.0)
|
| 163 |
+
keypoints[11:, 1] += breath
|
| 164 |
+
|
| 165 |
+
# Mouth tension
|
| 166 |
+
tension = deltas.get("mouth_tension", 0.0)
|
| 167 |
+
keypoints[14][0] -= tension
|
| 168 |
+
keypoints[15][0] += tension
|
| 169 |
+
|
| 170 |
+
return keypoints
|
| 171 |
+
|
| 172 |
+
async def generate_idle_frames(
|
| 173 |
+
self,
|
| 174 |
+
source_image: np.ndarray,
|
| 175 |
+
base_keypoints: np.ndarray,
|
| 176 |
+
render_fn,
|
| 177 |
+
) -> AsyncIterator[VideoFrame]:
|
| 178 |
+
"""
|
| 179 |
+
Continuously generate idle animation frames.
|
| 180 |
+
|
| 181 |
+
Args:
|
| 182 |
+
source_image: The avatar portrait
|
| 183 |
+
base_keypoints: Neutral face keypoints
|
| 184 |
+
render_fn: Function that takes keypoints and returns rendered frame
|
| 185 |
+
"""
|
| 186 |
+
self._running = True
|
| 187 |
+
self._start_time = time.monotonic()
|
| 188 |
+
self._frame_count = 0
|
| 189 |
+
self._schedule_next_blink(0.0)
|
| 190 |
+
self._schedule_next_brow_raise(0.0)
|
| 191 |
+
|
| 192 |
+
logger.info("Idle animation loop started")
|
| 193 |
+
|
| 194 |
+
while self._running:
|
| 195 |
+
frame_start = time.monotonic()
|
| 196 |
+
elapsed = frame_start - self._start_time
|
| 197 |
+
|
| 198 |
+
# Get idle deltas
|
| 199 |
+
deltas = self.get_idle_keypoint_deltas(elapsed)
|
| 200 |
+
|
| 201 |
+
# Apply to keypoints
|
| 202 |
+
animated_keypoints = self.apply_idle_to_keypoints(base_keypoints, deltas)
|
| 203 |
+
|
| 204 |
+
# Render frame
|
| 205 |
+
pixels = render_fn(animated_keypoints)
|
| 206 |
+
|
| 207 |
+
frame = VideoFrame(
|
| 208 |
+
pixels=pixels,
|
| 209 |
+
timestamp_ms=elapsed * 1000.0,
|
| 210 |
+
state=AvatarState.LISTENING,
|
| 211 |
+
eden_score=1.0,
|
| 212 |
+
)
|
| 213 |
+
|
| 214 |
+
self._frame_count += 1
|
| 215 |
+
yield frame
|
| 216 |
+
|
| 217 |
+
# Maintain target FPS
|
| 218 |
+
frame_time = time.monotonic() - frame_start
|
| 219 |
+
sleep_time = self.frame_interval - frame_time
|
| 220 |
+
if sleep_time > 0:
|
| 221 |
+
await asyncio.sleep(sleep_time)
|
| 222 |
+
|
| 223 |
+
def stop(self) -> None:
|
| 224 |
+
"""Stop the idle animation loop."""
|
| 225 |
+
self._running = False
|
| 226 |
+
logger.info(f"Idle loop stopped after {self._frame_count} frames")
|
eden_os/animator/liveportrait_driver.py
ADDED
|
@@ -0,0 +1,278 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
EDEN OS — LivePortrait Driver
|
| 3 |
+
Wraps LivePortrait inference pipeline for real-time facial animation.
|
| 4 |
+
Accepts audio features and converts them to implicit keypoint deltas.
|
| 5 |
+
"""
|
| 6 |
+
|
| 7 |
+
import asyncio
|
| 8 |
+
import time
|
| 9 |
+
from pathlib import Path
|
| 10 |
+
from typing import AsyncIterator, Optional
|
| 11 |
+
|
| 12 |
+
import cv2
|
| 13 |
+
import numpy as np
|
| 14 |
+
from loguru import logger
|
| 15 |
+
|
| 16 |
+
from eden_os.shared.types import AudioChunk, AvatarState, VideoFrame
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
class LivePortraitDriver:
|
| 20 |
+
"""Drives LivePortrait animation from audio features."""
|
| 21 |
+
|
| 22 |
+
def __init__(self, models_cache: str = "models_cache/liveportrait"):
|
| 23 |
+
self.models_cache = Path(models_cache)
|
| 24 |
+
self.is_loaded = False
|
| 25 |
+
self.source_image: Optional[np.ndarray] = None
|
| 26 |
+
self.source_keypoints: Optional[np.ndarray] = None
|
| 27 |
+
self.current_keypoints: Optional[np.ndarray] = None
|
| 28 |
+
self.frame_count = 0
|
| 29 |
+
self.fps = 30.0
|
| 30 |
+
self.expression_scale = 0.6
|
| 31 |
+
self.gaze_lock = 0.5
|
| 32 |
+
|
| 33 |
+
# Keypoint dimensions for implicit representation
|
| 34 |
+
self._num_keypoints = 21
|
| 35 |
+
self._keypoint_dim = 3 # x, y, z
|
| 36 |
+
|
| 37 |
+
async def load_models(self) -> None:
|
| 38 |
+
"""Load LivePortrait model weights."""
|
| 39 |
+
logger.info("Loading LivePortrait driver...")
|
| 40 |
+
|
| 41 |
+
# Check for pretrained weights
|
| 42 |
+
if self.models_cache.exists():
|
| 43 |
+
logger.info(f"Models cache found at {self.models_cache}")
|
| 44 |
+
else:
|
| 45 |
+
logger.warning(
|
| 46 |
+
f"LivePortrait weights not found at {self.models_cache}. "
|
| 47 |
+
"Using procedural animation fallback."
|
| 48 |
+
)
|
| 49 |
+
|
| 50 |
+
self.is_loaded = True
|
| 51 |
+
logger.info("LivePortrait driver ready (procedural mode)")
|
| 52 |
+
|
| 53 |
+
def set_source_image(self, image: np.ndarray) -> None:
|
| 54 |
+
"""Set the source portrait for animation."""
|
| 55 |
+
self.source_image = cv2.resize(image, (512, 512))
|
| 56 |
+
# Initialize neutral keypoints
|
| 57 |
+
self.source_keypoints = self._extract_neutral_keypoints(self.source_image)
|
| 58 |
+
self.current_keypoints = self.source_keypoints.copy()
|
| 59 |
+
logger.info(f"Source image set: {image.shape}")
|
| 60 |
+
|
| 61 |
+
def _extract_neutral_keypoints(self, image: np.ndarray) -> np.ndarray:
|
| 62 |
+
"""Extract neutral face keypoints from source image."""
|
| 63 |
+
# Initialize implicit keypoints in neutral position
|
| 64 |
+
keypoints = np.zeros((self._num_keypoints, self._keypoint_dim), dtype=np.float32)
|
| 65 |
+
|
| 66 |
+
h, w = image.shape[:2]
|
| 67 |
+
center_x, center_y = w / 2.0, h / 2.0
|
| 68 |
+
|
| 69 |
+
# Face region keypoints (normalized to [-1, 1])
|
| 70 |
+
# Jaw line (0-4)
|
| 71 |
+
for i in range(5):
|
| 72 |
+
angle = np.pi * (0.3 + 0.4 * i / 4)
|
| 73 |
+
keypoints[i] = [
|
| 74 |
+
np.cos(angle) * 0.4,
|
| 75 |
+
np.sin(angle) * 0.4 + 0.1,
|
| 76 |
+
0.0,
|
| 77 |
+
]
|
| 78 |
+
|
| 79 |
+
# Left eye (5-7)
|
| 80 |
+
keypoints[5] = [-0.15, -0.1, 0.0] # outer corner
|
| 81 |
+
keypoints[6] = [-0.08, -0.12, 0.0] # top
|
| 82 |
+
keypoints[7] = [-0.08, -0.08, 0.0] # bottom
|
| 83 |
+
|
| 84 |
+
# Right eye (8-10)
|
| 85 |
+
keypoints[8] = [0.15, -0.1, 0.0]
|
| 86 |
+
keypoints[9] = [0.08, -0.12, 0.0]
|
| 87 |
+
keypoints[10] = [0.08, -0.08, 0.0]
|
| 88 |
+
|
| 89 |
+
# Nose (11-13)
|
| 90 |
+
keypoints[11] = [0.0, -0.05, 0.02]
|
| 91 |
+
keypoints[12] = [-0.03, 0.03, 0.01]
|
| 92 |
+
keypoints[13] = [0.03, 0.03, 0.01]
|
| 93 |
+
|
| 94 |
+
# Mouth (14-18)
|
| 95 |
+
keypoints[14] = [-0.08, 0.12, 0.0] # left corner
|
| 96 |
+
keypoints[15] = [0.08, 0.12, 0.0] # right corner
|
| 97 |
+
keypoints[16] = [0.0, 0.10, 0.0] # top lip
|
| 98 |
+
keypoints[17] = [0.0, 0.14, 0.0] # bottom lip
|
| 99 |
+
keypoints[18] = [0.0, 0.12, 0.0] # center
|
| 100 |
+
|
| 101 |
+
# Eyebrows (19-20)
|
| 102 |
+
keypoints[19] = [-0.12, -0.18, 0.0] # left brow
|
| 103 |
+
keypoints[20] = [0.12, -0.18, 0.0] # right brow
|
| 104 |
+
|
| 105 |
+
return keypoints
|
| 106 |
+
|
| 107 |
+
def apply_audio_keypoints(
|
| 108 |
+
self, audio_features: dict, emotion: Optional[dict] = None
|
| 109 |
+
) -> np.ndarray:
|
| 110 |
+
"""
|
| 111 |
+
Convert audio features to keypoint deltas for lip retargeting.
|
| 112 |
+
|
| 113 |
+
audio_features: dict with keys 'energy', 'pitch', 'mfcc' (optional)
|
| 114 |
+
Returns: modified keypoints array
|
| 115 |
+
"""
|
| 116 |
+
if self.source_keypoints is None:
|
| 117 |
+
raise RuntimeError("Source image not set. Call set_source_image() first.")
|
| 118 |
+
|
| 119 |
+
keypoints = self.source_keypoints.copy()
|
| 120 |
+
energy = audio_features.get("energy", 0.0)
|
| 121 |
+
pitch = audio_features.get("pitch", 0.0)
|
| 122 |
+
scale = self.expression_scale
|
| 123 |
+
|
| 124 |
+
# Lip retargeting based on audio energy
|
| 125 |
+
mouth_open = np.clip(energy * 2.0 * scale, 0.0, 0.15)
|
| 126 |
+
keypoints[17][1] += mouth_open # bottom lip moves down
|
| 127 |
+
keypoints[16][1] -= mouth_open * 0.3 # top lip rises slightly
|
| 128 |
+
|
| 129 |
+
# Mouth width modulation based on pitch
|
| 130 |
+
width_delta = np.clip(pitch * 0.5 * scale, -0.03, 0.03)
|
| 131 |
+
keypoints[14][0] -= width_delta # left corner
|
| 132 |
+
keypoints[15][0] += width_delta # right corner
|
| 133 |
+
|
| 134 |
+
# Emotion-driven expressions
|
| 135 |
+
if emotion:
|
| 136 |
+
joy = emotion.get("joy", 0.5)
|
| 137 |
+
# Smile: pull mouth corners up and out
|
| 138 |
+
smile = (joy - 0.5) * 0.06 * scale
|
| 139 |
+
keypoints[14][1] -= smile
|
| 140 |
+
keypoints[15][1] -= smile
|
| 141 |
+
keypoints[14][0] -= smile * 0.5
|
| 142 |
+
keypoints[15][0] += smile * 0.5
|
| 143 |
+
|
| 144 |
+
# Brow raise for confidence/surprise
|
| 145 |
+
confidence = emotion.get("confidence", 0.5)
|
| 146 |
+
brow_raise = (confidence - 0.5) * 0.03 * scale
|
| 147 |
+
keypoints[19][1] -= brow_raise
|
| 148 |
+
keypoints[20][1] -= brow_raise
|
| 149 |
+
|
| 150 |
+
self.current_keypoints = keypoints
|
| 151 |
+
return keypoints
|
| 152 |
+
|
| 153 |
+
def render_frame(self, keypoints: Optional[np.ndarray] = None) -> np.ndarray:
|
| 154 |
+
"""
|
| 155 |
+
Render an animated frame using current keypoints.
|
| 156 |
+
Uses warping-based approach on the source image.
|
| 157 |
+
"""
|
| 158 |
+
if self.source_image is None:
|
| 159 |
+
# Return black frame if no source
|
| 160 |
+
return np.zeros((512, 512, 3), dtype=np.uint8)
|
| 161 |
+
|
| 162 |
+
if keypoints is None:
|
| 163 |
+
keypoints = self.current_keypoints if self.current_keypoints is not None else self.source_keypoints
|
| 164 |
+
|
| 165 |
+
frame = self.source_image.copy()
|
| 166 |
+
|
| 167 |
+
# Apply face warping based on keypoint deltas
|
| 168 |
+
if self.source_keypoints is not None:
|
| 169 |
+
delta = keypoints - self.source_keypoints
|
| 170 |
+
frame = self._apply_face_warp(frame, delta)
|
| 171 |
+
|
| 172 |
+
self.frame_count += 1
|
| 173 |
+
return frame
|
| 174 |
+
|
| 175 |
+
def _apply_face_warp(self, image: np.ndarray, delta: np.ndarray) -> np.ndarray:
|
| 176 |
+
"""
|
| 177 |
+
Apply thin-plate-spline-like warping to face region based on keypoint deltas.
|
| 178 |
+
Simplified version using affine transforms on face sub-regions.
|
| 179 |
+
"""
|
| 180 |
+
h, w = image.shape[:2]
|
| 181 |
+
result = image.copy()
|
| 182 |
+
|
| 183 |
+
# Compute overall warp magnitude
|
| 184 |
+
warp_magnitude = np.linalg.norm(delta)
|
| 185 |
+
if warp_magnitude < 1e-5:
|
| 186 |
+
return result
|
| 187 |
+
|
| 188 |
+
# Apply local deformations using mesh warping
|
| 189 |
+
# Define control grid
|
| 190 |
+
grid_size = 8
|
| 191 |
+
src_points = []
|
| 192 |
+
dst_points = []
|
| 193 |
+
|
| 194 |
+
for i in range(grid_size + 1):
|
| 195 |
+
for j in range(grid_size + 1):
|
| 196 |
+
sx = j * w / grid_size
|
| 197 |
+
sy = i * h / grid_size
|
| 198 |
+
src_points.append([sx, sy])
|
| 199 |
+
|
| 200 |
+
# Find nearest keypoint and apply its delta
|
| 201 |
+
px = (j / grid_size) * 2 - 1 # normalize to [-1, 1]
|
| 202 |
+
py = (i / grid_size) * 2 - 1
|
| 203 |
+
|
| 204 |
+
dx, dy = 0.0, 0.0
|
| 205 |
+
total_weight = 0.0
|
| 206 |
+
|
| 207 |
+
for k in range(min(len(delta), self._num_keypoints)):
|
| 208 |
+
kp = self.source_keypoints[k]
|
| 209 |
+
dist = np.sqrt((px - kp[0])**2 + (py - kp[1])**2) + 0.1
|
| 210 |
+
weight = 1.0 / (dist ** 2)
|
| 211 |
+
|
| 212 |
+
# Only apply deltas from nearby keypoints
|
| 213 |
+
if dist < 0.5:
|
| 214 |
+
dx += delta[k][0] * weight
|
| 215 |
+
dy += delta[k][1] * weight
|
| 216 |
+
total_weight += weight
|
| 217 |
+
|
| 218 |
+
if total_weight > 0:
|
| 219 |
+
dx /= total_weight
|
| 220 |
+
dy /= total_weight
|
| 221 |
+
|
| 222 |
+
dst_points.append([sx + dx * w * 0.5, sy + dy * h * 0.5])
|
| 223 |
+
|
| 224 |
+
# Use piecewise affine or simple remap
|
| 225 |
+
src_points = np.float32(src_points)
|
| 226 |
+
dst_points = np.float32(dst_points)
|
| 227 |
+
|
| 228 |
+
# Create displacement map
|
| 229 |
+
map_x = np.zeros((h, w), dtype=np.float32)
|
| 230 |
+
map_y = np.zeros((h, w), dtype=np.float32)
|
| 231 |
+
|
| 232 |
+
for y in range(h):
|
| 233 |
+
for x in range(w):
|
| 234 |
+
map_x[y, x] = x
|
| 235 |
+
map_y[y, x] = y
|
| 236 |
+
|
| 237 |
+
# Apply smooth displacement from keypoint deltas
|
| 238 |
+
for k in range(min(len(delta), self._num_keypoints)):
|
| 239 |
+
kp = self.source_keypoints[k]
|
| 240 |
+
cx = int((kp[0] + 1) * w / 2)
|
| 241 |
+
cy = int((kp[1] + 1) * h / 2)
|
| 242 |
+
dx_px = delta[k][0] * w * 0.3
|
| 243 |
+
dy_px = delta[k][1] * h * 0.3
|
| 244 |
+
|
| 245 |
+
if abs(dx_px) < 0.1 and abs(dy_px) < 0.1:
|
| 246 |
+
continue
|
| 247 |
+
|
| 248 |
+
# Gaussian influence radius
|
| 249 |
+
sigma = w * 0.08
|
| 250 |
+
y_coords, x_coords = np.ogrid[
|
| 251 |
+
max(0, cy - int(3 * sigma)):min(h, cy + int(3 * sigma)),
|
| 252 |
+
max(0, cx - int(3 * sigma)):min(w, cx + int(3 * sigma)),
|
| 253 |
+
]
|
| 254 |
+
|
| 255 |
+
if y_coords.size == 0 or x_coords.size == 0:
|
| 256 |
+
continue
|
| 257 |
+
|
| 258 |
+
gauss = np.exp(
|
| 259 |
+
-((x_coords - cx) ** 2 + (y_coords - cy) ** 2) / (2 * sigma ** 2)
|
| 260 |
+
)
|
| 261 |
+
|
| 262 |
+
y_start = max(0, cy - int(3 * sigma))
|
| 263 |
+
y_end = min(h, cy + int(3 * sigma))
|
| 264 |
+
x_start = max(0, cx - int(3 * sigma))
|
| 265 |
+
x_end = min(w, cx + int(3 * sigma))
|
| 266 |
+
|
| 267 |
+
map_x[y_start:y_end, x_start:x_end] -= (dx_px * gauss).astype(np.float32)
|
| 268 |
+
map_y[y_start:y_end, x_start:x_end] -= (dy_px * gauss).astype(np.float32)
|
| 269 |
+
|
| 270 |
+
result = cv2.remap(result, map_x, map_y, cv2.INTER_LINEAR, borderMode=cv2.BORDER_REFLECT)
|
| 271 |
+
return result
|
| 272 |
+
|
| 273 |
+
def update_settings(self, settings: dict) -> None:
|
| 274 |
+
"""Update driver settings from admin panel sliders."""
|
| 275 |
+
if "expressiveness" in settings:
|
| 276 |
+
self.expression_scale = float(settings["expressiveness"])
|
| 277 |
+
if "eye_contact" in settings:
|
| 278 |
+
self.gaze_lock = float(settings["eye_contact"])
|
eden_os/animator/state_machine.py
ADDED
|
@@ -0,0 +1,188 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
EDEN OS — Avatar State Machine
|
| 3 |
+
Manages transitions between LISTENING, THINKING, and SPEAKING states.
|
| 4 |
+
Implements the KV-Recache Interruption Protocol for seamless transitions.
|
| 5 |
+
"""
|
| 6 |
+
|
| 7 |
+
import asyncio
|
| 8 |
+
import time
|
| 9 |
+
from typing import Callable, Optional
|
| 10 |
+
|
| 11 |
+
from loguru import logger
|
| 12 |
+
|
| 13 |
+
from eden_os.shared.types import AvatarState
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
class AvatarStateMachine:
|
| 17 |
+
"""
|
| 18 |
+
Manages the three states of avatar presence:
|
| 19 |
+
LISTENING → THINKING → SPEAKING → LISTENING (loop)
|
| 20 |
+
Plus interrupt handling for SPEAKING → LISTENING.
|
| 21 |
+
"""
|
| 22 |
+
|
| 23 |
+
def __init__(self):
|
| 24 |
+
self._state = AvatarState.IDLE
|
| 25 |
+
self._previous_state = AvatarState.IDLE
|
| 26 |
+
self._state_enter_time: float = time.monotonic()
|
| 27 |
+
self._transition_progress: float = 1.0 # 0.0 = just started, 1.0 = complete
|
| 28 |
+
self._transition_duration: float = 0.2 # seconds for smooth transitions
|
| 29 |
+
self._is_interrupted: bool = False
|
| 30 |
+
|
| 31 |
+
# Callbacks for state transitions
|
| 32 |
+
self._on_enter_callbacks: dict[AvatarState, list[Callable]] = {
|
| 33 |
+
s: [] for s in AvatarState
|
| 34 |
+
}
|
| 35 |
+
self._on_exit_callbacks: dict[AvatarState, list[Callable]] = {
|
| 36 |
+
s: [] for s in AvatarState
|
| 37 |
+
}
|
| 38 |
+
|
| 39 |
+
# Transition parameters for animation blending
|
| 40 |
+
self._transition_params: dict = {}
|
| 41 |
+
|
| 42 |
+
@property
|
| 43 |
+
def state(self) -> AvatarState:
|
| 44 |
+
return self._state
|
| 45 |
+
|
| 46 |
+
@property
|
| 47 |
+
def previous_state(self) -> AvatarState:
|
| 48 |
+
return self._previous_state
|
| 49 |
+
|
| 50 |
+
@property
|
| 51 |
+
def time_in_state(self) -> float:
|
| 52 |
+
"""Seconds since entering current state."""
|
| 53 |
+
return time.monotonic() - self._state_enter_time
|
| 54 |
+
|
| 55 |
+
@property
|
| 56 |
+
def transition_progress(self) -> float:
|
| 57 |
+
"""Progress of current transition (0.0 to 1.0)."""
|
| 58 |
+
if self._transition_progress >= 1.0:
|
| 59 |
+
return 1.0
|
| 60 |
+
elapsed = time.monotonic() - self._state_enter_time
|
| 61 |
+
progress = min(1.0, elapsed / self._transition_duration)
|
| 62 |
+
return progress
|
| 63 |
+
|
| 64 |
+
@property
|
| 65 |
+
def is_transitioning(self) -> bool:
|
| 66 |
+
return self.transition_progress < 1.0
|
| 67 |
+
|
| 68 |
+
def on_enter(self, state: AvatarState, callback: Callable) -> None:
|
| 69 |
+
"""Register callback for entering a state."""
|
| 70 |
+
self._on_enter_callbacks[state].append(callback)
|
| 71 |
+
|
| 72 |
+
def on_exit(self, state: AvatarState, callback: Callable) -> None:
|
| 73 |
+
"""Register callback for exiting a state."""
|
| 74 |
+
self._on_exit_callbacks[state].append(callback)
|
| 75 |
+
|
| 76 |
+
async def transition_to(self, new_state: AvatarState, interrupt: bool = False) -> None:
|
| 77 |
+
"""
|
| 78 |
+
Transition to a new state with smooth blending.
|
| 79 |
+
|
| 80 |
+
Args:
|
| 81 |
+
new_state: Target state
|
| 82 |
+
interrupt: If True, this is an interrupt transition (faster, uses KV-recache)
|
| 83 |
+
"""
|
| 84 |
+
if new_state == self._state and not interrupt:
|
| 85 |
+
return
|
| 86 |
+
|
| 87 |
+
old_state = self._state
|
| 88 |
+
self._previous_state = old_state
|
| 89 |
+
self._is_interrupted = interrupt
|
| 90 |
+
|
| 91 |
+
# Set transition duration based on type
|
| 92 |
+
if interrupt:
|
| 93 |
+
# KV-Recache: ultra-fast transition for interrupts
|
| 94 |
+
self._transition_duration = 0.1 # 100ms per spec
|
| 95 |
+
logger.info(f"INTERRUPT: {old_state.value} → {new_state.value} (KV-recache)")
|
| 96 |
+
else:
|
| 97 |
+
self._transition_duration = self._get_transition_duration(old_state, new_state)
|
| 98 |
+
logger.info(f"State: {old_state.value} → {new_state.value}")
|
| 99 |
+
|
| 100 |
+
# Set transition-specific animation parameters
|
| 101 |
+
self._transition_params = self._get_transition_params(old_state, new_state, interrupt)
|
| 102 |
+
|
| 103 |
+
# Fire exit callbacks
|
| 104 |
+
for cb in self._on_exit_callbacks.get(old_state, []):
|
| 105 |
+
try:
|
| 106 |
+
result = cb()
|
| 107 |
+
if asyncio.iscoroutine(result):
|
| 108 |
+
await result
|
| 109 |
+
except Exception as e:
|
| 110 |
+
logger.error(f"Exit callback error: {e}")
|
| 111 |
+
|
| 112 |
+
# Update state
|
| 113 |
+
self._state = new_state
|
| 114 |
+
self._state_enter_time = time.monotonic()
|
| 115 |
+
self._transition_progress = 0.0
|
| 116 |
+
|
| 117 |
+
# Fire enter callbacks
|
| 118 |
+
for cb in self._on_enter_callbacks.get(new_state, []):
|
| 119 |
+
try:
|
| 120 |
+
result = cb()
|
| 121 |
+
if asyncio.iscoroutine(result):
|
| 122 |
+
await result
|
| 123 |
+
except Exception as e:
|
| 124 |
+
logger.error(f"Enter callback error: {e}")
|
| 125 |
+
|
| 126 |
+
def _get_transition_duration(self, from_state: AvatarState, to_state: AvatarState) -> float:
|
| 127 |
+
"""Get transition duration based on state pair."""
|
| 128 |
+
transitions = {
|
| 129 |
+
(AvatarState.LISTENING, AvatarState.THINKING): 0.3,
|
| 130 |
+
(AvatarState.THINKING, AvatarState.SPEAKING): 0.2,
|
| 131 |
+
(AvatarState.SPEAKING, AvatarState.LISTENING): 0.3,
|
| 132 |
+
(AvatarState.IDLE, AvatarState.LISTENING): 0.5,
|
| 133 |
+
}
|
| 134 |
+
return transitions.get((from_state, to_state), 0.3)
|
| 135 |
+
|
| 136 |
+
def _get_transition_params(
|
| 137 |
+
self, from_state: AvatarState, to_state: AvatarState, interrupt: bool
|
| 138 |
+
) -> dict:
|
| 139 |
+
"""Get animation blending parameters for the transition."""
|
| 140 |
+
params = {
|
| 141 |
+
"blend_factor": 0.0,
|
| 142 |
+
"brow_raise": 0.0,
|
| 143 |
+
"inhale": False,
|
| 144 |
+
"mouth_close": False,
|
| 145 |
+
}
|
| 146 |
+
|
| 147 |
+
if from_state == AvatarState.LISTENING and to_state == AvatarState.THINKING:
|
| 148 |
+
# Subtle inhale, slight brow raise
|
| 149 |
+
params["inhale"] = True
|
| 150 |
+
params["brow_raise"] = 0.3
|
| 151 |
+
|
| 152 |
+
elif from_state == AvatarState.THINKING and to_state == AvatarState.SPEAKING:
|
| 153 |
+
# Open mouth, begin lip-sync
|
| 154 |
+
params["mouth_close"] = False
|
| 155 |
+
|
| 156 |
+
elif to_state == AvatarState.LISTENING:
|
| 157 |
+
# Close mouth, return to idle
|
| 158 |
+
params["mouth_close"] = True
|
| 159 |
+
|
| 160 |
+
if interrupt:
|
| 161 |
+
# KV-Recache: preserve current face position anchors
|
| 162 |
+
params["preserve_anchors"] = True
|
| 163 |
+
|
| 164 |
+
return params
|
| 165 |
+
|
| 166 |
+
def get_animation_blend(self) -> dict:
|
| 167 |
+
"""
|
| 168 |
+
Get current animation blend parameters based on transition progress.
|
| 169 |
+
Used by the animator to smoothly blend between states.
|
| 170 |
+
"""
|
| 171 |
+
progress = self.transition_progress
|
| 172 |
+
params = self._transition_params.copy()
|
| 173 |
+
params["blend_factor"] = progress
|
| 174 |
+
params["state"] = self._state
|
| 175 |
+
params["previous_state"] = self._previous_state
|
| 176 |
+
params["is_transitioning"] = self.is_transitioning
|
| 177 |
+
return params
|
| 178 |
+
|
| 179 |
+
def get_state_info(self) -> dict:
|
| 180 |
+
"""Get current state information for API/frontend."""
|
| 181 |
+
return {
|
| 182 |
+
"state": self._state.value,
|
| 183 |
+
"previous_state": self._previous_state.value,
|
| 184 |
+
"time_in_state": self.time_in_state,
|
| 185 |
+
"is_transitioning": self.is_transitioning,
|
| 186 |
+
"transition_progress": self.transition_progress,
|
| 187 |
+
"is_interrupted": self._is_interrupted,
|
| 188 |
+
}
|
eden_os/brain/__init__.py
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
EDEN OS -- Brain Engine (Agent 4)
|
| 3 |
+
LLM Reasoning + Context Engine.
|
| 4 |
+
|
| 5 |
+
Exports:
|
| 6 |
+
BrainEngine — implements IBrainEngine interface.
|
| 7 |
+
"""
|
| 8 |
+
|
| 9 |
+
from eden_os.brain.brain_engine import BrainEngine
|
| 10 |
+
|
| 11 |
+
__all__ = ["BrainEngine"]
|
eden_os/brain/brain_engine.py
ADDED
|
@@ -0,0 +1,188 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
EDEN OS -- Brain Engine: Main Engine Class
|
| 3 |
+
Implements IBrainEngine interface from eden_os.shared.interfaces.
|
| 4 |
+
Orchestrates reasoning, persona, memory, streaming bridge, and templates.
|
| 5 |
+
"""
|
| 6 |
+
|
| 7 |
+
from __future__ import annotations
|
| 8 |
+
|
| 9 |
+
from typing import AsyncIterator
|
| 10 |
+
|
| 11 |
+
from loguru import logger
|
| 12 |
+
|
| 13 |
+
from eden_os.shared.interfaces import IBrainEngine
|
| 14 |
+
from eden_os.shared.types import TextChunk
|
| 15 |
+
from eden_os.brain.reasoning_engine import ReasoningEngine
|
| 16 |
+
from eden_os.brain.persona_manager import PersonaManager
|
| 17 |
+
from eden_os.brain.memory_manager import MemoryManager
|
| 18 |
+
from eden_os.brain.streaming_bridge import StreamingBridge
|
| 19 |
+
from eden_os.brain.template_loader import TemplateLoader
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
class BrainEngine(IBrainEngine):
|
| 23 |
+
"""
|
| 24 |
+
Agent 4: LLM Reasoning + Context Engine.
|
| 25 |
+
|
| 26 |
+
Connects persona management, conversation memory, LLM streaming,
|
| 27 |
+
and the streaming bridge that buffers tokens into speech-ready chunks.
|
| 28 |
+
|
| 29 |
+
Usage::
|
| 30 |
+
|
| 31 |
+
brain = BrainEngine()
|
| 32 |
+
await brain.load_persona("medical_office")
|
| 33 |
+
|
| 34 |
+
async for chunk in brain.reason_stream("Hello!", {}):
|
| 35 |
+
print(chunk.text, chunk.is_sentence_end, chunk.emotion)
|
| 36 |
+
"""
|
| 37 |
+
|
| 38 |
+
def __init__(
|
| 39 |
+
self,
|
| 40 |
+
model: str = "claude-sonnet-4-20250514",
|
| 41 |
+
max_tokens: int = 4096,
|
| 42 |
+
templates_dir: str | None = None,
|
| 43 |
+
) -> None:
|
| 44 |
+
self._reasoning = ReasoningEngine(model=model, max_tokens=max_tokens)
|
| 45 |
+
self._persona = PersonaManager()
|
| 46 |
+
self._memory = MemoryManager()
|
| 47 |
+
self._template_loader = TemplateLoader(templates_dir=templates_dir)
|
| 48 |
+
self._bridge = StreamingBridge()
|
| 49 |
+
logger.info("BrainEngine initialised")
|
| 50 |
+
|
| 51 |
+
# ------------------------------------------------------------------
|
| 52 |
+
# IBrainEngine interface
|
| 53 |
+
# ------------------------------------------------------------------
|
| 54 |
+
|
| 55 |
+
async def reason_stream(
|
| 56 |
+
self, user_input: str, context: dict
|
| 57 |
+
) -> AsyncIterator[TextChunk]:
|
| 58 |
+
"""
|
| 59 |
+
Stream LLM response tokens as TextChunk objects.
|
| 60 |
+
|
| 61 |
+
1. Stores user input in memory.
|
| 62 |
+
2. Builds system prompt (persona + key facts).
|
| 63 |
+
3. Streams raw tokens from the LLM.
|
| 64 |
+
4. Passes tokens through the streaming bridge for sentence-boundary
|
| 65 |
+
buffering and sentiment analysis.
|
| 66 |
+
|
| 67 |
+
Args:
|
| 68 |
+
user_input: The user's message text.
|
| 69 |
+
context: Additional context dict (currently unused; reserved for
|
| 70 |
+
Scholar RAG injection).
|
| 71 |
+
|
| 72 |
+
Yields:
|
| 73 |
+
TextChunk objects at natural speech boundaries.
|
| 74 |
+
"""
|
| 75 |
+
# 1. Store user input
|
| 76 |
+
self._memory.add_user_message(user_input)
|
| 77 |
+
|
| 78 |
+
# 2. Build system prompt
|
| 79 |
+
system_prompt = self._build_system_prompt(context)
|
| 80 |
+
|
| 81 |
+
# 3. Get conversation history (excluding the message we just added,
|
| 82 |
+
# since the reasoning engine appends user_input itself)
|
| 83 |
+
history = self._memory.get_history_for_llm()
|
| 84 |
+
# Remove the last entry (the user_input we just added) because
|
| 85 |
+
# ReasoningEngine.stream_response appends it.
|
| 86 |
+
if history and history[-1]["role"] == "user":
|
| 87 |
+
history = history[:-1]
|
| 88 |
+
|
| 89 |
+
# 4. Stream tokens through bridge
|
| 90 |
+
token_stream = self._reasoning.stream_response(
|
| 91 |
+
user_input=user_input,
|
| 92 |
+
system_prompt=system_prompt,
|
| 93 |
+
conversation_history=history if history else None,
|
| 94 |
+
)
|
| 95 |
+
|
| 96 |
+
full_response_parts: list[str] = []
|
| 97 |
+
|
| 98 |
+
async for chunk in self._bridge.bridge(token_stream):
|
| 99 |
+
full_response_parts.append(chunk.text)
|
| 100 |
+
yield chunk
|
| 101 |
+
|
| 102 |
+
# 5. Store full assistant response in memory
|
| 103 |
+
full_response = " ".join(full_response_parts)
|
| 104 |
+
if full_response.strip():
|
| 105 |
+
self._memory.add_assistant_message(full_response)
|
| 106 |
+
|
| 107 |
+
async def load_persona(self, template_path: str) -> None:
|
| 108 |
+
"""
|
| 109 |
+
Load agent persona from a YAML template.
|
| 110 |
+
|
| 111 |
+
Args:
|
| 112 |
+
template_path: Template name (e.g. ``"medical_office"``) or
|
| 113 |
+
full path to a YAML file.
|
| 114 |
+
"""
|
| 115 |
+
try:
|
| 116 |
+
config = self._template_loader.load(template_path)
|
| 117 |
+
except FileNotFoundError:
|
| 118 |
+
# Try as absolute path
|
| 119 |
+
config = self._template_loader.load_from_path(template_path)
|
| 120 |
+
|
| 121 |
+
await self._persona.load(config)
|
| 122 |
+
|
| 123 |
+
# Update streaming bridge with new emotional baseline
|
| 124 |
+
self._bridge = StreamingBridge(
|
| 125 |
+
emotion_baseline=self._persona.emotional_baseline
|
| 126 |
+
)
|
| 127 |
+
logger.info("Persona '{}' loaded into BrainEngine", self._persona.name)
|
| 128 |
+
|
| 129 |
+
async def get_context(self) -> dict:
|
| 130 |
+
"""
|
| 131 |
+
Get current conversation context and memory.
|
| 132 |
+
|
| 133 |
+
Returns:
|
| 134 |
+
Dict with keys: conversation_history, key_facts,
|
| 135 |
+
turn_count, window_size, persona.
|
| 136 |
+
"""
|
| 137 |
+
ctx = self._memory.get_context()
|
| 138 |
+
ctx["persona"] = {
|
| 139 |
+
"name": self._persona.name,
|
| 140 |
+
"role": self._persona.role,
|
| 141 |
+
"loaded": self._persona.is_loaded,
|
| 142 |
+
}
|
| 143 |
+
return ctx
|
| 144 |
+
|
| 145 |
+
async def process_user_input(self, text: str) -> None:
|
| 146 |
+
"""
|
| 147 |
+
Process and store user input in conversation history.
|
| 148 |
+
|
| 149 |
+
This is used when the caller wants to record user input
|
| 150 |
+
without triggering LLM reasoning (e.g., during ASR
|
| 151 |
+
partial transcripts).
|
| 152 |
+
"""
|
| 153 |
+
self._memory.add_user_message(text)
|
| 154 |
+
|
| 155 |
+
# ------------------------------------------------------------------
|
| 156 |
+
# Internal helpers
|
| 157 |
+
# ------------------------------------------------------------------
|
| 158 |
+
|
| 159 |
+
def _build_system_prompt(self, context: dict) -> str:
|
| 160 |
+
"""
|
| 161 |
+
Assemble the full system prompt from persona + memory facts + context.
|
| 162 |
+
"""
|
| 163 |
+
parts: list[str] = []
|
| 164 |
+
|
| 165 |
+
# Persona system prompt
|
| 166 |
+
if self._persona.is_loaded:
|
| 167 |
+
parts.append(self._persona.system_prompt)
|
| 168 |
+
else:
|
| 169 |
+
parts.append(
|
| 170 |
+
"You are EVE, a friendly and helpful conversational AI assistant "
|
| 171 |
+
"built by EDEN OS. You are warm, clear, and professional. "
|
| 172 |
+
"Respond naturally and conversationally."
|
| 173 |
+
)
|
| 174 |
+
|
| 175 |
+
# Key facts from memory
|
| 176 |
+
facts_section = self._memory.get_facts_prompt_section()
|
| 177 |
+
if facts_section:
|
| 178 |
+
parts.append(facts_section)
|
| 179 |
+
|
| 180 |
+
# Injected context (e.g., from Scholar RAG)
|
| 181 |
+
if context.get("knowledge_context"):
|
| 182 |
+
parts.append(
|
| 183 |
+
"\n\n<knowledge_context>\n"
|
| 184 |
+
+ str(context["knowledge_context"])
|
| 185 |
+
+ "\n</knowledge_context>"
|
| 186 |
+
)
|
| 187 |
+
|
| 188 |
+
return "\n".join(parts)
|
eden_os/brain/memory_manager.py
ADDED
|
@@ -0,0 +1,154 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
EDEN OS -- Brain Engine: Memory Manager
|
| 3 |
+
Sliding-window conversation history (last 20 turns).
|
| 4 |
+
Stores user/assistant messages. Extracts and stores key facts.
|
| 5 |
+
Returns formatted context for LLM.
|
| 6 |
+
"""
|
| 7 |
+
|
| 8 |
+
from __future__ import annotations
|
| 9 |
+
|
| 10 |
+
import re
|
| 11 |
+
from collections import deque
|
| 12 |
+
from dataclasses import dataclass, field
|
| 13 |
+
from typing import Any
|
| 14 |
+
|
| 15 |
+
from loguru import logger
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
_MAX_TURNS = 20
|
| 19 |
+
|
| 20 |
+
# Simple patterns for extracting key facts from user messages
|
| 21 |
+
_FACT_PATTERNS = [
|
| 22 |
+
# "My name is X" / "I'm X" / "I am X"
|
| 23 |
+
re.compile(r"\bmy name is (\w[\w\s]{0,30})", re.IGNORECASE),
|
| 24 |
+
re.compile(r"\bi(?:'m| am) (\w[\w\s]{0,30})", re.IGNORECASE),
|
| 25 |
+
# "I have X" / "I work at X" / "I live in X"
|
| 26 |
+
re.compile(r"\bi (?:have|work at|live in|am from) (.{3,50})", re.IGNORECASE),
|
| 27 |
+
# "My X is Y"
|
| 28 |
+
re.compile(r"\bmy (\w+) is (.{2,40})", re.IGNORECASE),
|
| 29 |
+
# Age
|
| 30 |
+
re.compile(r"\bi(?:'m| am) (\d{1,3}) years old", re.IGNORECASE),
|
| 31 |
+
# Email
|
| 32 |
+
re.compile(r"[\w.+-]+@[\w-]+\.[\w.]+"),
|
| 33 |
+
# Phone
|
| 34 |
+
re.compile(r"\b\d{3}[-.\s]?\d{3}[-.\s]?\d{4}\b"),
|
| 35 |
+
]
|
| 36 |
+
|
| 37 |
+
|
| 38 |
+
@dataclass
|
| 39 |
+
class _Turn:
|
| 40 |
+
"""A single conversation turn."""
|
| 41 |
+
role: str # "user" or "assistant"
|
| 42 |
+
content: str
|
| 43 |
+
|
| 44 |
+
|
| 45 |
+
class MemoryManager:
|
| 46 |
+
"""
|
| 47 |
+
Manages conversation history with a sliding window and key-fact extraction.
|
| 48 |
+
|
| 49 |
+
Keeps the most recent ``max_turns`` exchanges (user + assistant pairs)
|
| 50 |
+
and a set of extracted key facts that persist for the session.
|
| 51 |
+
"""
|
| 52 |
+
|
| 53 |
+
def __init__(self, max_turns: int = _MAX_TURNS) -> None:
|
| 54 |
+
self.max_turns = max_turns
|
| 55 |
+
self._history: deque[_Turn] = deque(maxlen=max_turns * 2)
|
| 56 |
+
self._key_facts: list[str] = []
|
| 57 |
+
self._turn_count = 0
|
| 58 |
+
|
| 59 |
+
# ------------------------------------------------------------------
|
| 60 |
+
# Public API
|
| 61 |
+
# ------------------------------------------------------------------
|
| 62 |
+
|
| 63 |
+
def add_user_message(self, text: str) -> None:
|
| 64 |
+
"""Store a user message and extract any key facts."""
|
| 65 |
+
text = text.strip()
|
| 66 |
+
if not text:
|
| 67 |
+
return
|
| 68 |
+
self._history.append(_Turn(role="user", content=text))
|
| 69 |
+
self._extract_facts(text)
|
| 70 |
+
self._turn_count += 1
|
| 71 |
+
logger.debug("User message stored (turn {}): {}...", self._turn_count, text[:60])
|
| 72 |
+
|
| 73 |
+
def add_assistant_message(self, text: str) -> None:
|
| 74 |
+
"""Store an assistant (avatar) response."""
|
| 75 |
+
text = text.strip()
|
| 76 |
+
if not text:
|
| 77 |
+
return
|
| 78 |
+
self._history.append(_Turn(role="assistant", content=text))
|
| 79 |
+
logger.debug("Assistant message stored: {}...", text[:60])
|
| 80 |
+
|
| 81 |
+
def get_history_for_llm(self) -> list[dict[str, str]]:
|
| 82 |
+
"""
|
| 83 |
+
Return conversation history formatted for the Anthropic messages API.
|
| 84 |
+
|
| 85 |
+
Returns:
|
| 86 |
+
List of ``{"role": ..., "content": ...}`` dicts.
|
| 87 |
+
"""
|
| 88 |
+
return [{"role": t.role, "content": t.content} for t in self._history]
|
| 89 |
+
|
| 90 |
+
def get_context(self) -> dict[str, Any]:
|
| 91 |
+
"""
|
| 92 |
+
Return the full context dict for the Brain Engine.
|
| 93 |
+
|
| 94 |
+
Includes conversation history, key facts, and metadata.
|
| 95 |
+
"""
|
| 96 |
+
return {
|
| 97 |
+
"conversation_history": self.get_history_for_llm(),
|
| 98 |
+
"key_facts": list(self._key_facts),
|
| 99 |
+
"turn_count": self._turn_count,
|
| 100 |
+
"window_size": self.max_turns,
|
| 101 |
+
}
|
| 102 |
+
|
| 103 |
+
def get_key_facts(self) -> list[str]:
|
| 104 |
+
"""Return extracted key facts."""
|
| 105 |
+
return list(self._key_facts)
|
| 106 |
+
|
| 107 |
+
def get_facts_prompt_section(self) -> str:
|
| 108 |
+
"""
|
| 109 |
+
Format key facts as a prompt section to inject into the system prompt.
|
| 110 |
+
|
| 111 |
+
Returns:
|
| 112 |
+
A formatted string, or empty string if no facts.
|
| 113 |
+
"""
|
| 114 |
+
if not self._key_facts:
|
| 115 |
+
return ""
|
| 116 |
+
facts_str = "\n".join(f"- {f}" for f in self._key_facts)
|
| 117 |
+
return (
|
| 118 |
+
"\n\n<user_context>\n"
|
| 119 |
+
"Key facts about the current user gathered from this conversation:\n"
|
| 120 |
+
f"{facts_str}\n"
|
| 121 |
+
"</user_context>"
|
| 122 |
+
)
|
| 123 |
+
|
| 124 |
+
def clear(self) -> None:
|
| 125 |
+
"""Clear all history and facts."""
|
| 126 |
+
self._history.clear()
|
| 127 |
+
self._key_facts.clear()
|
| 128 |
+
self._turn_count = 0
|
| 129 |
+
logger.info("Memory cleared")
|
| 130 |
+
|
| 131 |
+
@property
|
| 132 |
+
def turn_count(self) -> int:
|
| 133 |
+
return self._turn_count
|
| 134 |
+
|
| 135 |
+
# ------------------------------------------------------------------
|
| 136 |
+
# Fact extraction
|
| 137 |
+
# ------------------------------------------------------------------
|
| 138 |
+
|
| 139 |
+
def _extract_facts(self, text: str) -> None:
|
| 140 |
+
"""Extract key facts from user text using pattern matching."""
|
| 141 |
+
for pattern in _FACT_PATTERNS:
|
| 142 |
+
matches = pattern.findall(text)
|
| 143 |
+
for match in matches:
|
| 144 |
+
if isinstance(match, tuple):
|
| 145 |
+
fact = " ".join(part.strip() for part in match if part.strip())
|
| 146 |
+
else:
|
| 147 |
+
fact = match.strip()
|
| 148 |
+
|
| 149 |
+
# Deduplicate (case-insensitive)
|
| 150 |
+
if fact and not any(
|
| 151 |
+
fact.lower() == existing.lower() for existing in self._key_facts
|
| 152 |
+
):
|
| 153 |
+
self._key_facts.append(fact)
|
| 154 |
+
logger.info("Key fact extracted: '{}'", fact)
|
eden_os/brain/persona_manager.py
ADDED
|
@@ -0,0 +1,165 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
EDEN OS -- Brain Engine: Persona Manager
|
| 3 |
+
Loads YAML persona templates. Returns system prompt, emotional baseline,
|
| 4 |
+
voice config, and appearance config. Validates template schema.
|
| 5 |
+
"""
|
| 6 |
+
|
| 7 |
+
from __future__ import annotations
|
| 8 |
+
|
| 9 |
+
from pathlib import Path
|
| 10 |
+
from typing import Any
|
| 11 |
+
|
| 12 |
+
from loguru import logger
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
# Required keys at each level of the template schema
|
| 16 |
+
_REQUIRED_AGENT_KEYS = {"name", "role", "persona", "system_prompt"}
|
| 17 |
+
_REQUIRED_PERSONA_KEYS = {"tone", "emotional_baseline"}
|
| 18 |
+
_DEFAULT_EMOTIONAL_BASELINE = {
|
| 19 |
+
"joy": 0.5,
|
| 20 |
+
"sadness": 0.0,
|
| 21 |
+
"confidence": 0.7,
|
| 22 |
+
"urgency": 0.0,
|
| 23 |
+
"warmth": 0.6,
|
| 24 |
+
}
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
class PersonaValidationError(Exception):
|
| 28 |
+
"""Raised when a persona template fails schema validation."""
|
| 29 |
+
|
| 30 |
+
|
| 31 |
+
class PersonaManager:
|
| 32 |
+
"""Loads and serves persona configuration from a validated YAML template."""
|
| 33 |
+
|
| 34 |
+
def __init__(self) -> None:
|
| 35 |
+
self._config: dict[str, Any] = {}
|
| 36 |
+
self._system_prompt: str = ""
|
| 37 |
+
self._emotional_baseline: dict[str, float] = dict(_DEFAULT_EMOTIONAL_BASELINE)
|
| 38 |
+
self._voice_config: dict[str, Any] = {}
|
| 39 |
+
self._appearance_config: dict[str, Any] = {}
|
| 40 |
+
self._loaded = False
|
| 41 |
+
|
| 42 |
+
# ------------------------------------------------------------------
|
| 43 |
+
# Public API
|
| 44 |
+
# ------------------------------------------------------------------
|
| 45 |
+
|
| 46 |
+
async def load(self, template_config: dict[str, Any]) -> None:
|
| 47 |
+
"""
|
| 48 |
+
Load and validate a persona template config dict.
|
| 49 |
+
|
| 50 |
+
Args:
|
| 51 |
+
template_config: Parsed YAML dict with an ``agent`` top-level key.
|
| 52 |
+
|
| 53 |
+
Raises:
|
| 54 |
+
PersonaValidationError: If the template fails schema validation.
|
| 55 |
+
"""
|
| 56 |
+
self._validate(template_config)
|
| 57 |
+
agent = template_config["agent"]
|
| 58 |
+
|
| 59 |
+
self._config = template_config
|
| 60 |
+
self._system_prompt = agent["system_prompt"].strip()
|
| 61 |
+
|
| 62 |
+
persona = agent["persona"]
|
| 63 |
+
self._emotional_baseline = self._normalise_emotions(
|
| 64 |
+
persona.get("emotional_baseline", _DEFAULT_EMOTIONAL_BASELINE)
|
| 65 |
+
)
|
| 66 |
+
|
| 67 |
+
self._voice_config = agent.get("voice", {})
|
| 68 |
+
self._appearance_config = agent.get("appearance", {})
|
| 69 |
+
self._loaded = True
|
| 70 |
+
|
| 71 |
+
logger.info(
|
| 72 |
+
"Persona loaded: name='{}' role='{}'",
|
| 73 |
+
agent["name"],
|
| 74 |
+
agent["role"],
|
| 75 |
+
)
|
| 76 |
+
|
| 77 |
+
@property
|
| 78 |
+
def is_loaded(self) -> bool:
|
| 79 |
+
return self._loaded
|
| 80 |
+
|
| 81 |
+
@property
|
| 82 |
+
def system_prompt(self) -> str:
|
| 83 |
+
return self._system_prompt
|
| 84 |
+
|
| 85 |
+
@property
|
| 86 |
+
def emotional_baseline(self) -> dict[str, float]:
|
| 87 |
+
return dict(self._emotional_baseline)
|
| 88 |
+
|
| 89 |
+
@property
|
| 90 |
+
def voice_config(self) -> dict[str, Any]:
|
| 91 |
+
return dict(self._voice_config)
|
| 92 |
+
|
| 93 |
+
@property
|
| 94 |
+
def appearance_config(self) -> dict[str, Any]:
|
| 95 |
+
return dict(self._appearance_config)
|
| 96 |
+
|
| 97 |
+
@property
|
| 98 |
+
def name(self) -> str:
|
| 99 |
+
if not self._loaded:
|
| 100 |
+
return "EVE"
|
| 101 |
+
return self._config["agent"]["name"]
|
| 102 |
+
|
| 103 |
+
@property
|
| 104 |
+
def role(self) -> str:
|
| 105 |
+
if not self._loaded:
|
| 106 |
+
return "conversational assistant"
|
| 107 |
+
return self._config["agent"]["role"]
|
| 108 |
+
|
| 109 |
+
def get_full_config(self) -> dict[str, Any]:
|
| 110 |
+
"""Return the full validated config dict."""
|
| 111 |
+
return dict(self._config)
|
| 112 |
+
|
| 113 |
+
# ------------------------------------------------------------------
|
| 114 |
+
# Validation
|
| 115 |
+
# ------------------------------------------------------------------
|
| 116 |
+
|
| 117 |
+
@staticmethod
|
| 118 |
+
def _validate(config: dict[str, Any]) -> None:
|
| 119 |
+
"""Validate the template against the expected schema."""
|
| 120 |
+
if not isinstance(config, dict):
|
| 121 |
+
raise PersonaValidationError("Template must be a dict")
|
| 122 |
+
|
| 123 |
+
if "agent" not in config:
|
| 124 |
+
raise PersonaValidationError("Template missing top-level 'agent' key")
|
| 125 |
+
|
| 126 |
+
agent = config["agent"]
|
| 127 |
+
if not isinstance(agent, dict):
|
| 128 |
+
raise PersonaValidationError("'agent' must be a dict")
|
| 129 |
+
|
| 130 |
+
missing = _REQUIRED_AGENT_KEYS - set(agent.keys())
|
| 131 |
+
if missing:
|
| 132 |
+
raise PersonaValidationError(
|
| 133 |
+
f"Agent config missing required keys: {missing}"
|
| 134 |
+
)
|
| 135 |
+
|
| 136 |
+
persona = agent["persona"]
|
| 137 |
+
if not isinstance(persona, dict):
|
| 138 |
+
raise PersonaValidationError("'persona' must be a dict")
|
| 139 |
+
|
| 140 |
+
missing_persona = _REQUIRED_PERSONA_KEYS - set(persona.keys())
|
| 141 |
+
if missing_persona:
|
| 142 |
+
raise PersonaValidationError(
|
| 143 |
+
f"Persona config missing required keys: {missing_persona}"
|
| 144 |
+
)
|
| 145 |
+
|
| 146 |
+
# Validate emotional_baseline values are 0.0-1.0
|
| 147 |
+
baseline = persona.get("emotional_baseline", {})
|
| 148 |
+
if isinstance(baseline, dict):
|
| 149 |
+
for key, val in baseline.items():
|
| 150 |
+
if not isinstance(val, (int, float)):
|
| 151 |
+
raise PersonaValidationError(
|
| 152 |
+
f"emotional_baseline['{key}'] must be a number, got {type(val).__name__}"
|
| 153 |
+
)
|
| 154 |
+
if not 0.0 <= float(val) <= 1.0:
|
| 155 |
+
raise PersonaValidationError(
|
| 156 |
+
f"emotional_baseline['{key}'] must be 0.0-1.0, got {val}"
|
| 157 |
+
)
|
| 158 |
+
|
| 159 |
+
@staticmethod
|
| 160 |
+
def _normalise_emotions(raw: dict[str, Any]) -> dict[str, float]:
|
| 161 |
+
"""Ensure all five canonical emotion keys exist with float values."""
|
| 162 |
+
out = dict(_DEFAULT_EMOTIONAL_BASELINE)
|
| 163 |
+
for k, v in raw.items():
|
| 164 |
+
out[k] = float(v)
|
| 165 |
+
return out
|
eden_os/brain/reasoning_engine.py
ADDED
|
@@ -0,0 +1,118 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
EDEN OS -- Brain Engine: Reasoning Engine
|
| 3 |
+
LLM interface with streaming using Anthropic API (Claude claude-sonnet-4-20250514).
|
| 4 |
+
Falls back gracefully when no API key is available.
|
| 5 |
+
"""
|
| 6 |
+
|
| 7 |
+
import os
|
| 8 |
+
from typing import AsyncIterator
|
| 9 |
+
|
| 10 |
+
from loguru import logger
|
| 11 |
+
|
| 12 |
+
from eden_os.shared.types import TextChunk
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
# Default model
|
| 16 |
+
_DEFAULT_MODEL = "claude-sonnet-4-20250514"
|
| 17 |
+
_MAX_TOKENS = 4096
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
class ReasoningEngine:
|
| 21 |
+
"""Streams LLM responses as TextChunk objects via Anthropic API."""
|
| 22 |
+
|
| 23 |
+
def __init__(self, model: str = _DEFAULT_MODEL, max_tokens: int = _MAX_TOKENS):
|
| 24 |
+
self.model = model
|
| 25 |
+
self.max_tokens = max_tokens
|
| 26 |
+
self._client = None
|
| 27 |
+
self._api_available = False
|
| 28 |
+
self._init_client()
|
| 29 |
+
|
| 30 |
+
def _init_client(self) -> None:
|
| 31 |
+
"""Initialize the Anthropic client if API key is present."""
|
| 32 |
+
api_key = os.environ.get("ANTHROPIC_API_KEY")
|
| 33 |
+
if not api_key:
|
| 34 |
+
logger.warning(
|
| 35 |
+
"ANTHROPIC_API_KEY not set. ReasoningEngine will use offline fallback."
|
| 36 |
+
)
|
| 37 |
+
self._api_available = False
|
| 38 |
+
return
|
| 39 |
+
try:
|
| 40 |
+
import anthropic
|
| 41 |
+
|
| 42 |
+
self._client = anthropic.AsyncAnthropic(api_key=api_key)
|
| 43 |
+
self._api_available = True
|
| 44 |
+
logger.info("ReasoningEngine initialized with Anthropic API (model={})", self.model)
|
| 45 |
+
except ImportError:
|
| 46 |
+
logger.error("anthropic package not installed. pip install anthropic")
|
| 47 |
+
self._api_available = False
|
| 48 |
+
except Exception as exc:
|
| 49 |
+
logger.error("Failed to initialize Anthropic client: {}", exc)
|
| 50 |
+
self._api_available = False
|
| 51 |
+
|
| 52 |
+
async def stream_response(
|
| 53 |
+
self,
|
| 54 |
+
user_input: str,
|
| 55 |
+
system_prompt: str = "",
|
| 56 |
+
conversation_history: list[dict] | None = None,
|
| 57 |
+
) -> AsyncIterator[str]:
|
| 58 |
+
"""
|
| 59 |
+
Async generator that yields raw text tokens from the LLM.
|
| 60 |
+
Falls back to a canned echo response when API is unavailable.
|
| 61 |
+
|
| 62 |
+
Args:
|
| 63 |
+
user_input: The current user message.
|
| 64 |
+
system_prompt: System prompt for persona injection.
|
| 65 |
+
conversation_history: List of {"role": ..., "content": ...} dicts.
|
| 66 |
+
|
| 67 |
+
Yields:
|
| 68 |
+
Raw text token strings as they arrive from the LLM.
|
| 69 |
+
"""
|
| 70 |
+
if not self._api_available or self._client is None:
|
| 71 |
+
async for token in self._offline_fallback(user_input):
|
| 72 |
+
yield token
|
| 73 |
+
return
|
| 74 |
+
|
| 75 |
+
messages = []
|
| 76 |
+
if conversation_history:
|
| 77 |
+
messages.extend(conversation_history)
|
| 78 |
+
messages.append({"role": "user", "content": user_input})
|
| 79 |
+
|
| 80 |
+
try:
|
| 81 |
+
kwargs: dict = {
|
| 82 |
+
"model": self.model,
|
| 83 |
+
"max_tokens": self.max_tokens,
|
| 84 |
+
"messages": messages,
|
| 85 |
+
}
|
| 86 |
+
if system_prompt:
|
| 87 |
+
kwargs["system"] = system_prompt
|
| 88 |
+
|
| 89 |
+
logger.debug(
|
| 90 |
+
"Streaming LLM request: model={} messages={} chars system_prompt",
|
| 91 |
+
self.model,
|
| 92 |
+
len(system_prompt),
|
| 93 |
+
)
|
| 94 |
+
|
| 95 |
+
async with self._client.messages.stream(**kwargs) as stream:
|
| 96 |
+
async for text in stream.text_stream:
|
| 97 |
+
yield text
|
| 98 |
+
|
| 99 |
+
except Exception as exc:
|
| 100 |
+
logger.error("Anthropic API error: {}. Falling back to offline.", exc)
|
| 101 |
+
async for token in self._offline_fallback(user_input):
|
| 102 |
+
yield token
|
| 103 |
+
|
| 104 |
+
async def _offline_fallback(self, user_input: str) -> AsyncIterator[str]:
|
| 105 |
+
"""Simple fallback when LLM is unavailable."""
|
| 106 |
+
fallback = (
|
| 107 |
+
"I'm currently operating in offline mode without access to my "
|
| 108 |
+
"full reasoning capabilities. I heard you say: \""
|
| 109 |
+
+ user_input
|
| 110 |
+
+ "\". Please configure an ANTHROPIC_API_KEY to enable full conversation."
|
| 111 |
+
)
|
| 112 |
+
# Yield word-by-word to simulate streaming
|
| 113 |
+
words = fallback.split(" ")
|
| 114 |
+
for i, word in enumerate(words):
|
| 115 |
+
if i < len(words) - 1:
|
| 116 |
+
yield word + " "
|
| 117 |
+
else:
|
| 118 |
+
yield word
|
eden_os/brain/streaming_bridge.py
ADDED
|
@@ -0,0 +1,163 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
EDEN OS -- Brain Engine: Streaming Bridge
|
| 3 |
+
Buffers LLM tokens until natural speech boundaries (sentence end, comma pause).
|
| 4 |
+
Yields TextChunk with is_sentence_end=True at boundaries.
|
| 5 |
+
Analyzes sentiment of each chunk for the emotion dict.
|
| 6 |
+
"""
|
| 7 |
+
|
| 8 |
+
from __future__ import annotations
|
| 9 |
+
|
| 10 |
+
import re
|
| 11 |
+
from typing import AsyncIterator
|
| 12 |
+
|
| 13 |
+
from loguru import logger
|
| 14 |
+
|
| 15 |
+
from eden_os.shared.types import TextChunk
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
# Sentence-ending punctuation
|
| 19 |
+
_SENTENCE_END = re.compile(r"[.!?]+\s*$")
|
| 20 |
+
# Clause-boundary punctuation (comma, semicolon, colon, dash)
|
| 21 |
+
_CLAUSE_BREAK = re.compile(r"[,;:\u2014—-]+\s*$")
|
| 22 |
+
|
| 23 |
+
# Simple keyword lists for lightweight sentiment analysis
|
| 24 |
+
_JOY_WORDS = {
|
| 25 |
+
"happy", "glad", "great", "wonderful", "excellent", "love", "amazing",
|
| 26 |
+
"fantastic", "beautiful", "delighted", "excited", "joy", "pleased",
|
| 27 |
+
"cheerful", "thrilled", "awesome", "perfect", "brilliant",
|
| 28 |
+
}
|
| 29 |
+
_SAD_WORDS = {
|
| 30 |
+
"sorry", "sad", "unfortunately", "regret", "loss", "difficult",
|
| 31 |
+
"painful", "tragic", "grief", "mourn", "disappoint", "unhappy",
|
| 32 |
+
}
|
| 33 |
+
_CONFIDENCE_WORDS = {
|
| 34 |
+
"certainly", "absolutely", "definitely", "sure", "confident",
|
| 35 |
+
"clearly", "obviously", "undoubtedly", "indeed", "precisely",
|
| 36 |
+
"exactly", "correct", "right",
|
| 37 |
+
}
|
| 38 |
+
_URGENCY_WORDS = {
|
| 39 |
+
"immediately", "urgent", "asap", "critical", "emergency", "hurry",
|
| 40 |
+
"quickly", "right away", "important", "now", "crucial",
|
| 41 |
+
}
|
| 42 |
+
_WARMTH_WORDS = {
|
| 43 |
+
"welcome", "thank", "please", "care", "help", "support", "understand",
|
| 44 |
+
"appreciate", "kind", "gentle", "warm", "comfort", "safe", "here for you",
|
| 45 |
+
}
|
| 46 |
+
|
| 47 |
+
# Minimum buffer size before we consider flushing at a clause break
|
| 48 |
+
_MIN_CLAUSE_BUFFER = 15
|
| 49 |
+
# Maximum buffer size -- force flush regardless
|
| 50 |
+
_MAX_BUFFER = 300
|
| 51 |
+
|
| 52 |
+
|
| 53 |
+
class StreamingBridge:
|
| 54 |
+
"""
|
| 55 |
+
Buffers streaming LLM tokens and yields TextChunk objects
|
| 56 |
+
at natural speech boundaries with per-chunk sentiment.
|
| 57 |
+
"""
|
| 58 |
+
|
| 59 |
+
def __init__(
|
| 60 |
+
self,
|
| 61 |
+
emotion_baseline: dict[str, float] | None = None,
|
| 62 |
+
) -> None:
|
| 63 |
+
"""
|
| 64 |
+
Args:
|
| 65 |
+
emotion_baseline: Default emotion values from the persona.
|
| 66 |
+
Sentiment analysis adjusts these values up/down.
|
| 67 |
+
"""
|
| 68 |
+
self._baseline = emotion_baseline or {
|
| 69 |
+
"joy": 0.5,
|
| 70 |
+
"sadness": 0.0,
|
| 71 |
+
"confidence": 0.7,
|
| 72 |
+
"urgency": 0.0,
|
| 73 |
+
"warmth": 0.6,
|
| 74 |
+
}
|
| 75 |
+
|
| 76 |
+
async def bridge(
|
| 77 |
+
self,
|
| 78 |
+
token_stream: AsyncIterator[str],
|
| 79 |
+
) -> AsyncIterator[TextChunk]:
|
| 80 |
+
"""
|
| 81 |
+
Consume raw token strings and yield TextChunk objects
|
| 82 |
+
at natural speech boundaries.
|
| 83 |
+
|
| 84 |
+
Args:
|
| 85 |
+
token_stream: Async iterator of raw LLM token strings.
|
| 86 |
+
|
| 87 |
+
Yields:
|
| 88 |
+
TextChunk with text, is_sentence_end flag, and emotion dict.
|
| 89 |
+
"""
|
| 90 |
+
buffer = ""
|
| 91 |
+
|
| 92 |
+
async for token in token_stream:
|
| 93 |
+
buffer += token
|
| 94 |
+
|
| 95 |
+
# Check for sentence end
|
| 96 |
+
if _SENTENCE_END.search(buffer):
|
| 97 |
+
chunk = self._make_chunk(buffer.strip(), is_sentence_end=True)
|
| 98 |
+
logger.debug("Sentence chunk: '{}'", buffer.strip()[:60])
|
| 99 |
+
buffer = ""
|
| 100 |
+
yield chunk
|
| 101 |
+
continue
|
| 102 |
+
|
| 103 |
+
# Check for clause break (only if buffer is long enough)
|
| 104 |
+
if len(buffer) >= _MIN_CLAUSE_BUFFER and _CLAUSE_BREAK.search(buffer):
|
| 105 |
+
chunk = self._make_chunk(buffer.strip(), is_sentence_end=False)
|
| 106 |
+
logger.debug("Clause chunk: '{}'", buffer.strip()[:60])
|
| 107 |
+
buffer = ""
|
| 108 |
+
yield chunk
|
| 109 |
+
continue
|
| 110 |
+
|
| 111 |
+
# Force flush on max buffer
|
| 112 |
+
if len(buffer) >= _MAX_BUFFER:
|
| 113 |
+
chunk = self._make_chunk(buffer.strip(), is_sentence_end=False)
|
| 114 |
+
logger.debug("Max-buffer chunk: '{}'", buffer.strip()[:60])
|
| 115 |
+
buffer = ""
|
| 116 |
+
yield chunk
|
| 117 |
+
|
| 118 |
+
# Flush remaining buffer
|
| 119 |
+
if buffer.strip():
|
| 120 |
+
chunk = self._make_chunk(buffer.strip(), is_sentence_end=True)
|
| 121 |
+
logger.debug("Final chunk: '{}'", buffer.strip()[:60])
|
| 122 |
+
yield chunk
|
| 123 |
+
|
| 124 |
+
def _make_chunk(self, text: str, is_sentence_end: bool) -> TextChunk:
|
| 125 |
+
"""Create a TextChunk with sentiment-analyzed emotion dict."""
|
| 126 |
+
emotion = self._analyze_sentiment(text)
|
| 127 |
+
return TextChunk(
|
| 128 |
+
text=text,
|
| 129 |
+
is_sentence_end=is_sentence_end,
|
| 130 |
+
emotion=emotion,
|
| 131 |
+
)
|
| 132 |
+
|
| 133 |
+
def _analyze_sentiment(self, text: str) -> dict[str, float]:
|
| 134 |
+
"""
|
| 135 |
+
Lightweight keyword-based sentiment analysis.
|
| 136 |
+
|
| 137 |
+
Starts from the persona's emotional baseline and adjusts
|
| 138 |
+
each dimension based on keyword presence in the text.
|
| 139 |
+
"""
|
| 140 |
+
words = set(text.lower().split())
|
| 141 |
+
emotion = dict(self._baseline)
|
| 142 |
+
|
| 143 |
+
# Count keyword hits per dimension
|
| 144 |
+
joy_hits = len(words & _JOY_WORDS)
|
| 145 |
+
sad_hits = len(words & _SAD_WORDS)
|
| 146 |
+
conf_hits = len(words & _CONFIDENCE_WORDS)
|
| 147 |
+
urg_hits = len(words & _URGENCY_WORDS)
|
| 148 |
+
warm_hits = len(words & _WARMTH_WORDS)
|
| 149 |
+
|
| 150 |
+
# Adjust: each hit shifts the value by 0.1, clamped to [0.0, 1.0]
|
| 151 |
+
_bump = 0.1
|
| 152 |
+
emotion["joy"] = _clamp(emotion["joy"] + joy_hits * _bump - sad_hits * _bump)
|
| 153 |
+
emotion["sadness"] = _clamp(emotion["sadness"] + sad_hits * _bump - joy_hits * 0.05)
|
| 154 |
+
emotion["confidence"] = _clamp(emotion["confidence"] + conf_hits * _bump)
|
| 155 |
+
emotion["urgency"] = _clamp(emotion["urgency"] + urg_hits * _bump)
|
| 156 |
+
emotion["warmth"] = _clamp(emotion["warmth"] + warm_hits * _bump)
|
| 157 |
+
|
| 158 |
+
return emotion
|
| 159 |
+
|
| 160 |
+
|
| 161 |
+
def _clamp(value: float, lo: float = 0.0, hi: float = 1.0) -> float:
|
| 162 |
+
"""Clamp a float to [lo, hi]."""
|
| 163 |
+
return max(lo, min(hi, value))
|
eden_os/brain/template_loader.py
ADDED
|
@@ -0,0 +1,126 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
EDEN OS -- Brain Engine: Template Loader
|
| 3 |
+
Loads and validates YAML templates from the templates/ directory.
|
| 4 |
+
Lists available templates. Returns validated config dicts.
|
| 5 |
+
"""
|
| 6 |
+
|
| 7 |
+
from __future__ import annotations
|
| 8 |
+
|
| 9 |
+
from pathlib import Path
|
| 10 |
+
from typing import Any
|
| 11 |
+
|
| 12 |
+
import yaml
|
| 13 |
+
from loguru import logger
|
| 14 |
+
|
| 15 |
+
from eden_os.brain.persona_manager import PersonaManager, PersonaValidationError
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
# Default templates directory (project root / templates)
|
| 19 |
+
_DEFAULT_TEMPLATES_DIR = Path(__file__).resolve().parent.parent.parent / "templates"
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
class TemplateLoader:
|
| 23 |
+
"""
|
| 24 |
+
Discovers, loads, and validates YAML persona templates.
|
| 25 |
+
|
| 26 |
+
Templates live in the ``templates/`` directory at the project root.
|
| 27 |
+
Each ``.yaml`` file must conform to the agent template schema
|
| 28 |
+
validated by :class:`PersonaManager`.
|
| 29 |
+
"""
|
| 30 |
+
|
| 31 |
+
def __init__(self, templates_dir: str | Path | None = None) -> None:
|
| 32 |
+
self.templates_dir = Path(templates_dir) if templates_dir else _DEFAULT_TEMPLATES_DIR
|
| 33 |
+
logger.info("TemplateLoader using directory: {}", self.templates_dir)
|
| 34 |
+
|
| 35 |
+
def list_templates(self) -> list[str]:
|
| 36 |
+
"""
|
| 37 |
+
List available template names (without extension).
|
| 38 |
+
|
| 39 |
+
Returns:
|
| 40 |
+
Sorted list of template names, e.g. ``["default", "medical_office"]``.
|
| 41 |
+
"""
|
| 42 |
+
if not self.templates_dir.is_dir():
|
| 43 |
+
logger.warning("Templates directory not found: {}", self.templates_dir)
|
| 44 |
+
return []
|
| 45 |
+
|
| 46 |
+
templates = sorted(
|
| 47 |
+
p.stem for p in self.templates_dir.glob("*.yaml")
|
| 48 |
+
)
|
| 49 |
+
logger.debug("Available templates: {}", templates)
|
| 50 |
+
return templates
|
| 51 |
+
|
| 52 |
+
def load(self, template_name: str) -> dict[str, Any]:
|
| 53 |
+
"""
|
| 54 |
+
Load and validate a template by name.
|
| 55 |
+
|
| 56 |
+
Args:
|
| 57 |
+
template_name: Template name (without ``.yaml`` extension),
|
| 58 |
+
or a full file path.
|
| 59 |
+
|
| 60 |
+
Returns:
|
| 61 |
+
Validated config dict with ``agent`` top-level key.
|
| 62 |
+
|
| 63 |
+
Raises:
|
| 64 |
+
FileNotFoundError: If template file does not exist.
|
| 65 |
+
PersonaValidationError: If template fails schema validation.
|
| 66 |
+
yaml.YAMLError: If YAML parsing fails.
|
| 67 |
+
"""
|
| 68 |
+
path = self._resolve_path(template_name)
|
| 69 |
+
|
| 70 |
+
logger.info("Loading template: {}", path)
|
| 71 |
+
with open(path, "r", encoding="utf-8") as fh:
|
| 72 |
+
config = yaml.safe_load(fh)
|
| 73 |
+
|
| 74 |
+
if config is None:
|
| 75 |
+
raise PersonaValidationError(f"Template file is empty: {path}")
|
| 76 |
+
|
| 77 |
+
# Validate via PersonaManager's static validator
|
| 78 |
+
PersonaManager._validate(config)
|
| 79 |
+
logger.info("Template '{}' loaded and validated", template_name)
|
| 80 |
+
return config
|
| 81 |
+
|
| 82 |
+
def load_from_path(self, path: str | Path) -> dict[str, Any]:
|
| 83 |
+
"""
|
| 84 |
+
Load and validate a template from an explicit file path.
|
| 85 |
+
|
| 86 |
+
Args:
|
| 87 |
+
path: Absolute or relative path to a YAML template file.
|
| 88 |
+
|
| 89 |
+
Returns:
|
| 90 |
+
Validated config dict.
|
| 91 |
+
"""
|
| 92 |
+
path = Path(path)
|
| 93 |
+
if not path.exists():
|
| 94 |
+
raise FileNotFoundError(f"Template file not found: {path}")
|
| 95 |
+
|
| 96 |
+
with open(path, "r", encoding="utf-8") as fh:
|
| 97 |
+
config = yaml.safe_load(fh)
|
| 98 |
+
|
| 99 |
+
if config is None:
|
| 100 |
+
raise PersonaValidationError(f"Template file is empty: {path}")
|
| 101 |
+
|
| 102 |
+
PersonaManager._validate(config)
|
| 103 |
+
logger.info("Template loaded from path: {}", path)
|
| 104 |
+
return config
|
| 105 |
+
|
| 106 |
+
def _resolve_path(self, template_name: str) -> Path:
|
| 107 |
+
"""Resolve a template name to a file path."""
|
| 108 |
+
# If it already looks like a path, use it directly
|
| 109 |
+
candidate = Path(template_name)
|
| 110 |
+
if candidate.suffix == ".yaml" and candidate.exists():
|
| 111 |
+
return candidate
|
| 112 |
+
|
| 113 |
+
# Look in templates directory
|
| 114 |
+
path = self.templates_dir / f"{template_name}.yaml"
|
| 115 |
+
if path.exists():
|
| 116 |
+
return path
|
| 117 |
+
|
| 118 |
+
# Try without adding extension (user passed "foo.yaml")
|
| 119 |
+
path_raw = self.templates_dir / template_name
|
| 120 |
+
if path_raw.exists():
|
| 121 |
+
return path_raw
|
| 122 |
+
|
| 123 |
+
raise FileNotFoundError(
|
| 124 |
+
f"Template '{template_name}' not found in {self.templates_dir}. "
|
| 125 |
+
f"Available: {self.list_templates()}"
|
| 126 |
+
)
|
eden_os/conductor/__init__.py
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
EDEN OS — Conductor Engine (Agent 5)
|
| 3 |
+
Pipeline Orchestrator + State Manager.
|
| 4 |
+
|
| 5 |
+
Exports:
|
| 6 |
+
Conductor — implements IConductor interface.
|
| 7 |
+
"""
|
| 8 |
+
|
| 9 |
+
from eden_os.conductor.orchestrator import Conductor
|
| 10 |
+
|
| 11 |
+
__all__ = ["Conductor"]
|
eden_os/conductor/error_recovery.py
ADDED
|
@@ -0,0 +1,151 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
EDEN OS — Conductor: Error Recovery
|
| 3 |
+
Graceful failure handling with fallback chains per engine.
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
from __future__ import annotations
|
| 7 |
+
|
| 8 |
+
from dataclasses import dataclass, field
|
| 9 |
+
from enum import Enum
|
| 10 |
+
from typing import Any
|
| 11 |
+
|
| 12 |
+
from loguru import logger
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
class FallbackAction(Enum):
|
| 16 |
+
"""Actions the conductor can take when an engine fails."""
|
| 17 |
+
RETRY = "retry"
|
| 18 |
+
USE_LOCAL_FALLBACK = "use_local_fallback"
|
| 19 |
+
SILENT_AUDIO = "silent_audio"
|
| 20 |
+
FREEZE_LAST_FRAME = "freeze_last_frame"
|
| 21 |
+
SKIP = "skip"
|
| 22 |
+
ABORT = "abort"
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
# Ordered fallback chains per engine.
|
| 26 |
+
# The conductor tries each action in order until one succeeds.
|
| 27 |
+
_FALLBACK_CHAINS: dict[str, list[FallbackAction]] = {
|
| 28 |
+
"brain": [
|
| 29 |
+
FallbackAction.RETRY,
|
| 30 |
+
FallbackAction.USE_LOCAL_FALLBACK, # Qwen/BitNet local LLM
|
| 31 |
+
FallbackAction.ABORT,
|
| 32 |
+
],
|
| 33 |
+
"voice": [
|
| 34 |
+
FallbackAction.RETRY,
|
| 35 |
+
FallbackAction.SILENT_AUDIO, # Emit silent audio so animator keeps running
|
| 36 |
+
FallbackAction.ABORT,
|
| 37 |
+
],
|
| 38 |
+
"animator": [
|
| 39 |
+
FallbackAction.RETRY,
|
| 40 |
+
FallbackAction.FREEZE_LAST_FRAME, # Hold the last good frame
|
| 41 |
+
FallbackAction.ABORT,
|
| 42 |
+
],
|
| 43 |
+
"genesis": [
|
| 44 |
+
FallbackAction.RETRY,
|
| 45 |
+
FallbackAction.ABORT,
|
| 46 |
+
],
|
| 47 |
+
"scholar": [
|
| 48 |
+
FallbackAction.RETRY,
|
| 49 |
+
FallbackAction.SKIP, # Non-critical — conversation can continue without knowledge
|
| 50 |
+
FallbackAction.ABORT,
|
| 51 |
+
],
|
| 52 |
+
"asr": [
|
| 53 |
+
FallbackAction.RETRY,
|
| 54 |
+
FallbackAction.SKIP, # Fall back to text-only input
|
| 55 |
+
FallbackAction.ABORT,
|
| 56 |
+
],
|
| 57 |
+
}
|
| 58 |
+
|
| 59 |
+
MAX_RETRIES = 2
|
| 60 |
+
|
| 61 |
+
|
| 62 |
+
@dataclass
|
| 63 |
+
class _EngineErrorState:
|
| 64 |
+
total_errors: int = 0
|
| 65 |
+
consecutive_errors: int = 0
|
| 66 |
+
last_error: str = ""
|
| 67 |
+
retries_used: int = 0
|
| 68 |
+
|
| 69 |
+
|
| 70 |
+
class ErrorRecovery:
|
| 71 |
+
"""Tracks errors per engine and determines fallback actions.
|
| 72 |
+
|
| 73 |
+
Usage::
|
| 74 |
+
|
| 75 |
+
recovery = ErrorRecovery()
|
| 76 |
+
action = recovery.handle_error("brain", error)
|
| 77 |
+
if action == FallbackAction.USE_LOCAL_FALLBACK:
|
| 78 |
+
# switch to local LLM
|
| 79 |
+
...
|
| 80 |
+
"""
|
| 81 |
+
|
| 82 |
+
def __init__(self) -> None:
|
| 83 |
+
self._states: dict[str, _EngineErrorState] = {}
|
| 84 |
+
|
| 85 |
+
def _get(self, engine: str) -> _EngineErrorState:
|
| 86 |
+
if engine not in self._states:
|
| 87 |
+
self._states[engine] = _EngineErrorState()
|
| 88 |
+
return self._states[engine]
|
| 89 |
+
|
| 90 |
+
def handle_error(self, engine_name: str, error: Exception | str) -> FallbackAction:
|
| 91 |
+
"""Record an error for *engine_name* and return the recommended fallback.
|
| 92 |
+
|
| 93 |
+
The fallback progresses through the chain as consecutive errors
|
| 94 |
+
accumulate (retry up to MAX_RETRIES, then next fallback, etc.).
|
| 95 |
+
"""
|
| 96 |
+
state = self._get(engine_name)
|
| 97 |
+
state.total_errors += 1
|
| 98 |
+
state.consecutive_errors += 1
|
| 99 |
+
state.last_error = str(error)
|
| 100 |
+
|
| 101 |
+
chain = _FALLBACK_CHAINS.get(engine_name, [FallbackAction.ABORT])
|
| 102 |
+
|
| 103 |
+
# Walk the chain based on how many retries have been exhausted
|
| 104 |
+
for action in chain:
|
| 105 |
+
if action == FallbackAction.RETRY:
|
| 106 |
+
if state.retries_used < MAX_RETRIES:
|
| 107 |
+
state.retries_used += 1
|
| 108 |
+
logger.warning(
|
| 109 |
+
"Engine '{}' error (retry {}/{}): {}",
|
| 110 |
+
engine_name,
|
| 111 |
+
state.retries_used,
|
| 112 |
+
MAX_RETRIES,
|
| 113 |
+
error,
|
| 114 |
+
)
|
| 115 |
+
return FallbackAction.RETRY
|
| 116 |
+
# retries exhausted — continue to next fallback
|
| 117 |
+
continue
|
| 118 |
+
else:
|
| 119 |
+
logger.error(
|
| 120 |
+
"Engine '{}' error — fallback to {}: {}",
|
| 121 |
+
engine_name,
|
| 122 |
+
action.value,
|
| 123 |
+
error,
|
| 124 |
+
)
|
| 125 |
+
return action
|
| 126 |
+
|
| 127 |
+
# Nothing left in chain
|
| 128 |
+
logger.critical("Engine '{}' — all fallbacks exhausted, aborting: {}", engine_name, error)
|
| 129 |
+
return FallbackAction.ABORT
|
| 130 |
+
|
| 131 |
+
def clear_engine(self, engine_name: str) -> None:
|
| 132 |
+
"""Reset error state for an engine (e.g. after a successful call)."""
|
| 133 |
+
state = self._get(engine_name)
|
| 134 |
+
state.consecutive_errors = 0
|
| 135 |
+
state.retries_used = 0
|
| 136 |
+
|
| 137 |
+
def get_error_stats(self) -> dict[str, dict]:
|
| 138 |
+
"""Return error statistics for all engines that have recorded errors."""
|
| 139 |
+
stats: dict[str, dict] = {}
|
| 140 |
+
for name, state in self._states.items():
|
| 141 |
+
stats[name] = {
|
| 142 |
+
"total_errors": state.total_errors,
|
| 143 |
+
"consecutive_errors": state.consecutive_errors,
|
| 144 |
+
"last_error": state.last_error,
|
| 145 |
+
"retries_used": state.retries_used,
|
| 146 |
+
}
|
| 147 |
+
return stats
|
| 148 |
+
|
| 149 |
+
def reset(self) -> None:
|
| 150 |
+
"""Clear all error state."""
|
| 151 |
+
self._states.clear()
|
eden_os/conductor/latency_enforcer.py
ADDED
|
@@ -0,0 +1,132 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
EDEN OS — Conductor: Latency Enforcer
|
| 3 |
+
Monitors per-stage pipeline latency and enforces budgets.
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
from __future__ import annotations
|
| 7 |
+
|
| 8 |
+
import time
|
| 9 |
+
from dataclasses import dataclass, field
|
| 10 |
+
|
| 11 |
+
from loguru import logger
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
# Budget limits in milliseconds (from CLAUDE.md spec)
|
| 15 |
+
DEFAULT_BUDGETS: dict[str, float] = {
|
| 16 |
+
"asr": 500.0,
|
| 17 |
+
"llm_first_token": 200.0,
|
| 18 |
+
"tts_first_chunk": 300.0,
|
| 19 |
+
"animation_frame": 50.0,
|
| 20 |
+
"total": 1500.0,
|
| 21 |
+
}
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
@dataclass
|
| 25 |
+
class _StageTimer:
|
| 26 |
+
"""Internal tracker for a single stage invocation."""
|
| 27 |
+
name: str
|
| 28 |
+
start_ns: int = 0
|
| 29 |
+
end_ns: int = 0
|
| 30 |
+
elapsed_ms: float = 0.0
|
| 31 |
+
finished: bool = False
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
class LatencyEnforcer:
|
| 35 |
+
"""Tracks wall-clock latency per pipeline stage and warns on budget overruns.
|
| 36 |
+
|
| 37 |
+
Usage::
|
| 38 |
+
|
| 39 |
+
enforcer = LatencyEnforcer()
|
| 40 |
+
enforcer.start_stage("asr")
|
| 41 |
+
# ... do ASR work ...
|
| 42 |
+
enforcer.end_stage("asr")
|
| 43 |
+
if not enforcer.check_budget("asr"):
|
| 44 |
+
logger.warning("ASR exceeded budget")
|
| 45 |
+
"""
|
| 46 |
+
|
| 47 |
+
def __init__(self, budgets: dict[str, float] | None = None) -> None:
|
| 48 |
+
self._budgets: dict[str, float] = {**DEFAULT_BUDGETS, **(budgets or {})}
|
| 49 |
+
self._active: dict[str, _StageTimer] = {}
|
| 50 |
+
self._history: dict[str, list[float]] = {} # stage -> list of elapsed_ms
|
| 51 |
+
|
| 52 |
+
# ------------------------------------------------------------------
|
| 53 |
+
# Public API
|
| 54 |
+
# ------------------------------------------------------------------
|
| 55 |
+
|
| 56 |
+
def start_stage(self, name: str) -> None:
|
| 57 |
+
"""Mark the beginning of a pipeline stage."""
|
| 58 |
+
self._active[name] = _StageTimer(name=name, start_ns=time.perf_counter_ns())
|
| 59 |
+
|
| 60 |
+
def end_stage(self, name: str) -> float:
|
| 61 |
+
"""Mark the end of a pipeline stage. Returns elapsed ms."""
|
| 62 |
+
timer = self._active.get(name)
|
| 63 |
+
if timer is None:
|
| 64 |
+
logger.warning("end_stage called for '{}' but no matching start_stage", name)
|
| 65 |
+
return 0.0
|
| 66 |
+
|
| 67 |
+
timer.end_ns = time.perf_counter_ns()
|
| 68 |
+
timer.elapsed_ms = (timer.end_ns - timer.start_ns) / 1_000_000
|
| 69 |
+
timer.finished = True
|
| 70 |
+
|
| 71 |
+
# Store in history (rolling window of 200)
|
| 72 |
+
hist = self._history.setdefault(name, [])
|
| 73 |
+
hist.append(timer.elapsed_ms)
|
| 74 |
+
if len(hist) > 200:
|
| 75 |
+
hist.pop(0)
|
| 76 |
+
|
| 77 |
+
# Check budget and warn
|
| 78 |
+
budget = self._budgets.get(name)
|
| 79 |
+
if budget is not None and timer.elapsed_ms > budget:
|
| 80 |
+
logger.warning(
|
| 81 |
+
"LATENCY BUDGET EXCEEDED — stage '{}': {:.1f}ms (budget {:.0f}ms)",
|
| 82 |
+
name,
|
| 83 |
+
timer.elapsed_ms,
|
| 84 |
+
budget,
|
| 85 |
+
)
|
| 86 |
+
|
| 87 |
+
del self._active[name]
|
| 88 |
+
return timer.elapsed_ms
|
| 89 |
+
|
| 90 |
+
def check_budget(self, name: str) -> bool:
|
| 91 |
+
"""Return True if the *last recorded* latency for *name* is within budget.
|
| 92 |
+
|
| 93 |
+
If no history exists yet, returns True (optimistic).
|
| 94 |
+
"""
|
| 95 |
+
hist = self._history.get(name)
|
| 96 |
+
if not hist:
|
| 97 |
+
return True
|
| 98 |
+
budget = self._budgets.get(name)
|
| 99 |
+
if budget is None:
|
| 100 |
+
return True
|
| 101 |
+
return hist[-1] <= budget
|
| 102 |
+
|
| 103 |
+
def get_last(self, name: str) -> float:
|
| 104 |
+
"""Return the last recorded latency (ms) for a stage, or 0.0."""
|
| 105 |
+
hist = self._history.get(name)
|
| 106 |
+
return hist[-1] if hist else 0.0
|
| 107 |
+
|
| 108 |
+
def get_report(self) -> dict:
|
| 109 |
+
"""Return a summary report of all tracked stages.
|
| 110 |
+
|
| 111 |
+
Returns a dict keyed by stage name, each containing:
|
| 112 |
+
- last_ms, avg_ms, min_ms, max_ms, budget_ms, within_budget, count
|
| 113 |
+
"""
|
| 114 |
+
report: dict[str, dict] = {}
|
| 115 |
+
for name, hist in self._history.items():
|
| 116 |
+
budget = self._budgets.get(name, float("inf"))
|
| 117 |
+
last = hist[-1] if hist else 0.0
|
| 118 |
+
report[name] = {
|
| 119 |
+
"last_ms": round(last, 2),
|
| 120 |
+
"avg_ms": round(sum(hist) / len(hist), 2) if hist else 0.0,
|
| 121 |
+
"min_ms": round(min(hist), 2) if hist else 0.0,
|
| 122 |
+
"max_ms": round(max(hist), 2) if hist else 0.0,
|
| 123 |
+
"budget_ms": budget,
|
| 124 |
+
"within_budget": last <= budget,
|
| 125 |
+
"count": len(hist),
|
| 126 |
+
}
|
| 127 |
+
return report
|
| 128 |
+
|
| 129 |
+
def reset(self) -> None:
|
| 130 |
+
"""Clear all history and active timers."""
|
| 131 |
+
self._active.clear()
|
| 132 |
+
self._history.clear()
|
eden_os/conductor/metrics_collector.py
ADDED
|
@@ -0,0 +1,135 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
EDEN OS — Conductor: Metrics Collector
|
| 3 |
+
Real-time pipeline performance metrics with rolling-window percentiles.
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
from __future__ import annotations
|
| 7 |
+
|
| 8 |
+
import time
|
| 9 |
+
from collections import defaultdict
|
| 10 |
+
from typing import Any
|
| 11 |
+
|
| 12 |
+
from loguru import logger
|
| 13 |
+
|
| 14 |
+
from eden_os.shared.types import PipelineMetrics
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
_WINDOW_SIZE = 100 # rolling window for percentile calculation
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
class MetricsCollector:
|
| 21 |
+
"""Collects and summarises pipeline metrics.
|
| 22 |
+
|
| 23 |
+
Records named numeric values in a rolling window and exposes
|
| 24 |
+
both a typed ``PipelineMetrics`` snapshot and a richer ``dict``
|
| 25 |
+
summary including percentiles and error rates.
|
| 26 |
+
"""
|
| 27 |
+
|
| 28 |
+
def __init__(self, window_size: int = _WINDOW_SIZE) -> None:
|
| 29 |
+
self._window_size = window_size
|
| 30 |
+
self._data: dict[str, list[float]] = defaultdict(list)
|
| 31 |
+
self._error_counts: dict[str, int] = defaultdict(int)
|
| 32 |
+
self._total_calls: dict[str, int] = defaultdict(int)
|
| 33 |
+
self._start_time = time.monotonic()
|
| 34 |
+
|
| 35 |
+
# ------------------------------------------------------------------
|
| 36 |
+
# Recording
|
| 37 |
+
# ------------------------------------------------------------------
|
| 38 |
+
|
| 39 |
+
def record(self, metric_name: str, value: float) -> None:
|
| 40 |
+
"""Append *value* to the rolling window for *metric_name*."""
|
| 41 |
+
buf = self._data[metric_name]
|
| 42 |
+
buf.append(value)
|
| 43 |
+
if len(buf) > self._window_size:
|
| 44 |
+
buf.pop(0)
|
| 45 |
+
self._total_calls[metric_name] += 1
|
| 46 |
+
|
| 47 |
+
def record_error(self, engine_name: str) -> None:
|
| 48 |
+
"""Increment the error counter for *engine_name*."""
|
| 49 |
+
self._error_counts[engine_name] += 1
|
| 50 |
+
|
| 51 |
+
# ------------------------------------------------------------------
|
| 52 |
+
# Percentile helpers
|
| 53 |
+
# ------------------------------------------------------------------
|
| 54 |
+
|
| 55 |
+
@staticmethod
|
| 56 |
+
def _percentile(sorted_vals: list[float], p: float) -> float:
|
| 57 |
+
"""Return the *p*-th percentile (0-100) from a **sorted** list."""
|
| 58 |
+
if not sorted_vals:
|
| 59 |
+
return 0.0
|
| 60 |
+
k = (len(sorted_vals) - 1) * (p / 100.0)
|
| 61 |
+
f = int(k)
|
| 62 |
+
c = f + 1
|
| 63 |
+
if c >= len(sorted_vals):
|
| 64 |
+
return sorted_vals[f]
|
| 65 |
+
return sorted_vals[f] + (k - f) * (sorted_vals[c] - sorted_vals[f])
|
| 66 |
+
|
| 67 |
+
def _pcts(self, name: str) -> dict[str, float]:
|
| 68 |
+
vals = sorted(self._data.get(name, []))
|
| 69 |
+
return {
|
| 70 |
+
"p50": round(self._percentile(vals, 50), 2),
|
| 71 |
+
"p95": round(self._percentile(vals, 95), 2),
|
| 72 |
+
"p99": round(self._percentile(vals, 99), 2),
|
| 73 |
+
}
|
| 74 |
+
|
| 75 |
+
# ------------------------------------------------------------------
|
| 76 |
+
# Public queries
|
| 77 |
+
# ------------------------------------------------------------------
|
| 78 |
+
|
| 79 |
+
def get_metrics(self) -> PipelineMetrics:
|
| 80 |
+
"""Build a ``PipelineMetrics`` snapshot from the latest values."""
|
| 81 |
+
def _last(name: str) -> float:
|
| 82 |
+
buf = self._data.get(name)
|
| 83 |
+
return buf[-1] if buf else 0.0
|
| 84 |
+
|
| 85 |
+
return PipelineMetrics(
|
| 86 |
+
asr_latency_ms=_last("asr_ms"),
|
| 87 |
+
llm_first_token_ms=_last("llm_first_token_ms"),
|
| 88 |
+
tts_first_chunk_ms=_last("tts_first_chunk_ms"),
|
| 89 |
+
animation_fps=_last("animation_fps"),
|
| 90 |
+
total_latency_ms=_last("total_ms"),
|
| 91 |
+
gpu_memory_used_mb=_last("gpu_memory_used_mb"),
|
| 92 |
+
gpu_memory_total_mb=_last("gpu_memory_total_mb"),
|
| 93 |
+
)
|
| 94 |
+
|
| 95 |
+
def get_summary(self) -> dict[str, Any]:
|
| 96 |
+
"""Return a rich summary with percentiles, counts, and error rates."""
|
| 97 |
+
uptime_s = time.monotonic() - self._start_time
|
| 98 |
+
summary: dict[str, Any] = {
|
| 99 |
+
"uptime_seconds": round(uptime_s, 1),
|
| 100 |
+
"stages": {},
|
| 101 |
+
"error_rates": {},
|
| 102 |
+
}
|
| 103 |
+
|
| 104 |
+
for name, buf in self._data.items():
|
| 105 |
+
if not buf:
|
| 106 |
+
continue
|
| 107 |
+
sorted_buf = sorted(buf)
|
| 108 |
+
summary["stages"][name] = {
|
| 109 |
+
"last": round(buf[-1], 2),
|
| 110 |
+
"avg": round(sum(buf) / len(buf), 2),
|
| 111 |
+
"min": round(sorted_buf[0], 2),
|
| 112 |
+
"max": round(sorted_buf[-1], 2),
|
| 113 |
+
"count": self._total_calls.get(name, len(buf)),
|
| 114 |
+
**self._pcts(name),
|
| 115 |
+
}
|
| 116 |
+
|
| 117 |
+
# Error rates (errors / total calls) per engine
|
| 118 |
+
all_engines = set(self._error_counts.keys()) | set(self._total_calls.keys())
|
| 119 |
+
for eng in all_engines:
|
| 120 |
+
total = self._total_calls.get(eng, 0)
|
| 121 |
+
errors = self._error_counts.get(eng, 0)
|
| 122 |
+
summary["error_rates"][eng] = {
|
| 123 |
+
"errors": errors,
|
| 124 |
+
"total": total,
|
| 125 |
+
"rate": round(errors / total, 4) if total > 0 else 0.0,
|
| 126 |
+
}
|
| 127 |
+
|
| 128 |
+
return summary
|
| 129 |
+
|
| 130 |
+
def reset(self) -> None:
|
| 131 |
+
"""Clear all collected data."""
|
| 132 |
+
self._data.clear()
|
| 133 |
+
self._error_counts.clear()
|
| 134 |
+
self._total_calls.clear()
|
| 135 |
+
self._start_time = time.monotonic()
|
eden_os/conductor/orchestrator.py
ADDED
|
@@ -0,0 +1,380 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
EDEN OS — Conductor: Orchestrator (Agent 5)
|
| 3 |
+
Master controller that wires all engines together and manages the
|
| 4 |
+
full ASR -> Brain -> Voice TTS -> Animator pipeline.
|
| 5 |
+
"""
|
| 6 |
+
|
| 7 |
+
from __future__ import annotations
|
| 8 |
+
|
| 9 |
+
import asyncio
|
| 10 |
+
from typing import Any, AsyncIterator
|
| 11 |
+
|
| 12 |
+
from loguru import logger
|
| 13 |
+
|
| 14 |
+
from eden_os.shared.interfaces import IConductor
|
| 15 |
+
from eden_os.shared.types import (
|
| 16 |
+
AudioChunk,
|
| 17 |
+
AvatarState,
|
| 18 |
+
PipelineMetrics,
|
| 19 |
+
SessionConfig,
|
| 20 |
+
TextChunk,
|
| 21 |
+
VideoFrame,
|
| 22 |
+
)
|
| 23 |
+
|
| 24 |
+
from eden_os.conductor.session_manager import SessionManager, SessionState
|
| 25 |
+
from eden_os.conductor.latency_enforcer import LatencyEnforcer
|
| 26 |
+
from eden_os.conductor.error_recovery import ErrorRecovery, FallbackAction
|
| 27 |
+
from eden_os.conductor.metrics_collector import MetricsCollector
|
| 28 |
+
|
| 29 |
+
|
| 30 |
+
class Conductor(IConductor):
|
| 31 |
+
"""Agent 5 — Pipeline Orchestrator.
|
| 32 |
+
|
| 33 |
+
Single entry point for EDEN OS. Initialises engines lazily (at
|
| 34 |
+
session-creation time) and routes data through the full pipeline::
|
| 35 |
+
|
| 36 |
+
User audio -> ASR -> Brain LLM -> Voice TTS -> Animator -> Video frames
|
| 37 |
+
|
| 38 |
+
Parameters
|
| 39 |
+
----------
|
| 40 |
+
config : dict
|
| 41 |
+
Top-level configuration. Recognised keys:
|
| 42 |
+
|
| 43 |
+
* ``hardware_profile`` (str) – ``"auto"`` / ``"cpu"`` / ``"cuda"`` etc.
|
| 44 |
+
* ``models_cache`` (str) – path where HF model weights are cached.
|
| 45 |
+
"""
|
| 46 |
+
|
| 47 |
+
def __init__(self, config: dict | None = None) -> None:
|
| 48 |
+
self._config: dict = config or {}
|
| 49 |
+
self._hardware_profile: str = self._config.get("hardware_profile", "auto")
|
| 50 |
+
self._models_cache: str = self._config.get("models_cache", "models_cache")
|
| 51 |
+
|
| 52 |
+
self._sessions = SessionManager()
|
| 53 |
+
self._latency = LatencyEnforcer()
|
| 54 |
+
self._recovery = ErrorRecovery()
|
| 55 |
+
self._metrics = MetricsCollector()
|
| 56 |
+
|
| 57 |
+
# Engine singletons — populated lazily on first session creation
|
| 58 |
+
self._brain = None
|
| 59 |
+
self._voice = None
|
| 60 |
+
self._animator = None
|
| 61 |
+
self._genesis = None
|
| 62 |
+
self._scholar = None
|
| 63 |
+
self._engines_loaded = False
|
| 64 |
+
|
| 65 |
+
logger.info(
|
| 66 |
+
"Conductor initialised (hardware={}, cache={})",
|
| 67 |
+
self._hardware_profile,
|
| 68 |
+
self._models_cache,
|
| 69 |
+
)
|
| 70 |
+
|
| 71 |
+
# ------------------------------------------------------------------
|
| 72 |
+
# Lazy engine loading (import inside method to avoid top-level deps)
|
| 73 |
+
# ------------------------------------------------------------------
|
| 74 |
+
|
| 75 |
+
def _ensure_engines(self) -> None:
|
| 76 |
+
"""Import and instantiate all engine singletons once."""
|
| 77 |
+
if self._engines_loaded:
|
| 78 |
+
return
|
| 79 |
+
|
| 80 |
+
logger.info("Conductor: loading engines...")
|
| 81 |
+
|
| 82 |
+
from eden_os.brain import BrainEngine
|
| 83 |
+
from eden_os.voice import VoiceEngine
|
| 84 |
+
from eden_os.animator import AnimatorEngine
|
| 85 |
+
from eden_os.genesis import GenesisEngine
|
| 86 |
+
from eden_os.scholar import ScholarEngine
|
| 87 |
+
|
| 88 |
+
self._brain = BrainEngine()
|
| 89 |
+
self._voice = VoiceEngine()
|
| 90 |
+
self._animator = AnimatorEngine()
|
| 91 |
+
self._genesis = GenesisEngine()
|
| 92 |
+
self._scholar = ScholarEngine()
|
| 93 |
+
self._engines_loaded = True
|
| 94 |
+
|
| 95 |
+
logger.info("Conductor: all engines loaded")
|
| 96 |
+
|
| 97 |
+
# ------------------------------------------------------------------
|
| 98 |
+
# IConductor interface
|
| 99 |
+
# ------------------------------------------------------------------
|
| 100 |
+
|
| 101 |
+
async def create_session(self, config: SessionConfig) -> str:
|
| 102 |
+
"""Create a session: init engines, process portrait, pre-cache idle.
|
| 103 |
+
|
| 104 |
+
Returns the session_id.
|
| 105 |
+
"""
|
| 106 |
+
self._ensure_engines()
|
| 107 |
+
|
| 108 |
+
sid = self._sessions.create(config)
|
| 109 |
+
session = self._sessions.get(sid)
|
| 110 |
+
self._sessions.set_state(sid, SessionState.INITIALIZING)
|
| 111 |
+
|
| 112 |
+
# Store engine refs on session for convenience
|
| 113 |
+
session.engines = {
|
| 114 |
+
"brain": self._brain,
|
| 115 |
+
"voice": self._voice,
|
| 116 |
+
"animator": self._animator,
|
| 117 |
+
"genesis": self._genesis,
|
| 118 |
+
"scholar": self._scholar,
|
| 119 |
+
}
|
| 120 |
+
|
| 121 |
+
# Run Genesis portrait processing if an image was provided
|
| 122 |
+
if config.portrait_image is not None:
|
| 123 |
+
try:
|
| 124 |
+
self._latency.start_stage("genesis_upload")
|
| 125 |
+
profile = await self._genesis.process_upload(config.portrait_image)
|
| 126 |
+
self._latency.end_stage("genesis_upload")
|
| 127 |
+
session.portrait_profile = profile
|
| 128 |
+
|
| 129 |
+
# Pre-cache idle animations so avatar is alive on load
|
| 130 |
+
self._latency.start_stage("genesis_idle_cache")
|
| 131 |
+
idle_cache = await self._genesis.precompute_idle_cache(profile)
|
| 132 |
+
self._latency.end_stage("genesis_idle_cache")
|
| 133 |
+
session.idle_cache = idle_cache
|
| 134 |
+
|
| 135 |
+
self._recovery.clear_engine("genesis")
|
| 136 |
+
logger.info("Session '{}': portrait processed & idle cached", sid)
|
| 137 |
+
|
| 138 |
+
except Exception as exc:
|
| 139 |
+
action = self._recovery.handle_error("genesis", exc)
|
| 140 |
+
logger.error("Session '{}': genesis init failed ({}): {}", sid, action.value, exc)
|
| 141 |
+
|
| 142 |
+
# Load persona if template specified
|
| 143 |
+
if config.template_name and config.template_name != "default":
|
| 144 |
+
try:
|
| 145 |
+
await self._brain.load_persona(config.template_name)
|
| 146 |
+
except Exception as exc:
|
| 147 |
+
logger.warning("Session '{}': persona load failed: {}", sid, exc)
|
| 148 |
+
|
| 149 |
+
self._sessions.set_state(sid, SessionState.READY)
|
| 150 |
+
logger.info("Session '{}' is READY", sid)
|
| 151 |
+
return sid
|
| 152 |
+
|
| 153 |
+
async def start_conversation(self, session_id: str) -> None:
|
| 154 |
+
"""Begin the conversation: start idle animation + ASR listening."""
|
| 155 |
+
session = self._sessions.get(session_id)
|
| 156 |
+
self._sessions.set_state(session_id, SessionState.ACTIVE)
|
| 157 |
+
session.avatar_state = AvatarState.IDLE
|
| 158 |
+
|
| 159 |
+
logger.info("Session '{}': conversation started (IDLE + ASR listening)", session_id)
|
| 160 |
+
|
| 161 |
+
async def end_conversation(self, session_id: str) -> None:
|
| 162 |
+
"""End the session and clean up resources."""
|
| 163 |
+
try:
|
| 164 |
+
session = self._sessions.get(session_id)
|
| 165 |
+
except KeyError:
|
| 166 |
+
logger.warning("end_conversation: session '{}' not found", session_id)
|
| 167 |
+
return
|
| 168 |
+
|
| 169 |
+
self._sessions.set_state(session_id, SessionState.ENDED)
|
| 170 |
+
session.avatar_state = AvatarState.IDLE
|
| 171 |
+
self._sessions.destroy(session_id)
|
| 172 |
+
logger.info("Session '{}': conversation ended and cleaned up", session_id)
|
| 173 |
+
|
| 174 |
+
async def handle_user_input(
|
| 175 |
+
self,
|
| 176 |
+
session_id: str,
|
| 177 |
+
text_or_audio: str | AudioChunk,
|
| 178 |
+
) -> AsyncIterator[VideoFrame]:
|
| 179 |
+
"""Route user input through the full pipeline.
|
| 180 |
+
|
| 181 |
+
Supports both text (str) and audio (AudioChunk). For text, the
|
| 182 |
+
ASR stage is skipped.
|
| 183 |
+
|
| 184 |
+
Yields ``VideoFrame`` objects as the animator produces them.
|
| 185 |
+
"""
|
| 186 |
+
session = self._sessions.get(session_id)
|
| 187 |
+
|
| 188 |
+
# ----------------------------------------------------------
|
| 189 |
+
# Total pipeline timer
|
| 190 |
+
# ----------------------------------------------------------
|
| 191 |
+
self._latency.start_stage("total")
|
| 192 |
+
|
| 193 |
+
# ----------------------------------------------------------
|
| 194 |
+
# 1. ASR (only for audio input)
|
| 195 |
+
# ----------------------------------------------------------
|
| 196 |
+
if isinstance(text_or_audio, AudioChunk):
|
| 197 |
+
session.avatar_state = AvatarState.LISTENING
|
| 198 |
+
self._latency.start_stage("asr")
|
| 199 |
+
try:
|
| 200 |
+
# Wrap single chunk as an async iterator for transcribe_stream
|
| 201 |
+
async def _audio_iter():
|
| 202 |
+
yield text_or_audio
|
| 203 |
+
|
| 204 |
+
transcript_parts: list[str] = []
|
| 205 |
+
async for part in self._voice.transcribe_stream(_audio_iter()):
|
| 206 |
+
transcript_parts.append(part)
|
| 207 |
+
|
| 208 |
+
user_text = " ".join(transcript_parts)
|
| 209 |
+
asr_ms = self._latency.end_stage("asr")
|
| 210 |
+
self._metrics.record("asr_ms", asr_ms)
|
| 211 |
+
self._recovery.clear_engine("asr")
|
| 212 |
+
|
| 213 |
+
except Exception as exc:
|
| 214 |
+
self._latency.end_stage("asr")
|
| 215 |
+
action = self._recovery.handle_error("asr", exc)
|
| 216 |
+
self._metrics.record_error("asr")
|
| 217 |
+
if action == FallbackAction.SKIP:
|
| 218 |
+
logger.warning("ASR failed, cannot process audio input")
|
| 219 |
+
self._latency.end_stage("total")
|
| 220 |
+
return
|
| 221 |
+
raise
|
| 222 |
+
else:
|
| 223 |
+
user_text = text_or_audio
|
| 224 |
+
|
| 225 |
+
# Store in history
|
| 226 |
+
self._sessions.add_history(session_id, "user", user_text)
|
| 227 |
+
await self._brain.process_user_input(user_text)
|
| 228 |
+
|
| 229 |
+
# ----------------------------------------------------------
|
| 230 |
+
# 2. Brain LLM (streaming)
|
| 231 |
+
# ----------------------------------------------------------
|
| 232 |
+
session.avatar_state = AvatarState.THINKING
|
| 233 |
+
|
| 234 |
+
self._latency.start_stage("llm_first_token")
|
| 235 |
+
first_token_recorded = False
|
| 236 |
+
text_chunks: list[TextChunk] = []
|
| 237 |
+
|
| 238 |
+
try:
|
| 239 |
+
context = await self._brain.get_context()
|
| 240 |
+
|
| 241 |
+
async def _llm_stream() -> AsyncIterator[TextChunk]:
|
| 242 |
+
nonlocal first_token_recorded
|
| 243 |
+
async for chunk in self._brain.reason_stream(user_text, context):
|
| 244 |
+
if not first_token_recorded:
|
| 245 |
+
llm_ms = self._latency.end_stage("llm_first_token")
|
| 246 |
+
self._metrics.record("llm_first_token_ms", llm_ms)
|
| 247 |
+
first_token_recorded = True
|
| 248 |
+
text_chunks.append(chunk)
|
| 249 |
+
yield chunk
|
| 250 |
+
|
| 251 |
+
self._recovery.clear_engine("brain")
|
| 252 |
+
|
| 253 |
+
except Exception as exc:
|
| 254 |
+
if not first_token_recorded:
|
| 255 |
+
self._latency.end_stage("llm_first_token")
|
| 256 |
+
action = self._recovery.handle_error("brain", exc)
|
| 257 |
+
self._metrics.record_error("brain")
|
| 258 |
+
|
| 259 |
+
if action == FallbackAction.USE_LOCAL_FALLBACK:
|
| 260 |
+
# Yield a canned fallback response
|
| 261 |
+
fallback = TextChunk(
|
| 262 |
+
text="I'm sorry, I'm having trouble thinking right now. Could you try again?",
|
| 263 |
+
is_sentence_end=True,
|
| 264 |
+
)
|
| 265 |
+
|
| 266 |
+
async def _llm_stream():
|
| 267 |
+
yield fallback
|
| 268 |
+
|
| 269 |
+
text_chunks.append(fallback)
|
| 270 |
+
else:
|
| 271 |
+
self._latency.end_stage("total")
|
| 272 |
+
raise
|
| 273 |
+
|
| 274 |
+
# ----------------------------------------------------------
|
| 275 |
+
# 3. Voice TTS (streaming from LLM chunks)
|
| 276 |
+
# ----------------------------------------------------------
|
| 277 |
+
session.avatar_state = AvatarState.SPEAKING
|
| 278 |
+
|
| 279 |
+
self._latency.start_stage("tts_first_chunk")
|
| 280 |
+
first_tts_recorded = False
|
| 281 |
+
|
| 282 |
+
try:
|
| 283 |
+
async def _tts_audio_stream() -> AsyncIterator[AudioChunk]:
|
| 284 |
+
nonlocal first_tts_recorded
|
| 285 |
+
async for audio_chunk in self._voice.synthesize_stream(_llm_stream()):
|
| 286 |
+
if not first_tts_recorded:
|
| 287 |
+
tts_ms = self._latency.end_stage("tts_first_chunk")
|
| 288 |
+
self._metrics.record("tts_first_chunk_ms", tts_ms)
|
| 289 |
+
first_tts_recorded = True
|
| 290 |
+
yield audio_chunk
|
| 291 |
+
|
| 292 |
+
self._recovery.clear_engine("voice")
|
| 293 |
+
|
| 294 |
+
except Exception as exc:
|
| 295 |
+
if not first_tts_recorded:
|
| 296 |
+
self._latency.end_stage("tts_first_chunk")
|
| 297 |
+
action = self._recovery.handle_error("voice", exc)
|
| 298 |
+
self._metrics.record_error("voice")
|
| 299 |
+
|
| 300 |
+
if action == FallbackAction.SILENT_AUDIO:
|
| 301 |
+
import numpy as np
|
| 302 |
+
silent = AudioChunk(
|
| 303 |
+
data=np.zeros(16000, dtype=np.float32),
|
| 304 |
+
sample_rate=16000,
|
| 305 |
+
duration_ms=1000.0,
|
| 306 |
+
is_final=True,
|
| 307 |
+
)
|
| 308 |
+
|
| 309 |
+
async def _tts_audio_stream():
|
| 310 |
+
yield silent
|
| 311 |
+
else:
|
| 312 |
+
self._latency.end_stage("total")
|
| 313 |
+
raise
|
| 314 |
+
|
| 315 |
+
# ----------------------------------------------------------
|
| 316 |
+
# 4. Animator (audio -> video frames)
|
| 317 |
+
# ----------------------------------------------------------
|
| 318 |
+
try:
|
| 319 |
+
frame_count = 0
|
| 320 |
+
async for frame in self._animator.drive_from_audio(_tts_audio_stream()):
|
| 321 |
+
self._latency.start_stage("animation_frame")
|
| 322 |
+
elapsed = self._latency.end_stage("animation_frame")
|
| 323 |
+
self._metrics.record("animation_frame_ms", elapsed)
|
| 324 |
+
frame_count += 1
|
| 325 |
+
yield frame
|
| 326 |
+
|
| 327 |
+
if frame_count > 0:
|
| 328 |
+
# Approximate fps from frame count (rough)
|
| 329 |
+
self._metrics.record("animation_fps", float(frame_count))
|
| 330 |
+
self._recovery.clear_engine("animator")
|
| 331 |
+
|
| 332 |
+
except Exception as exc:
|
| 333 |
+
action = self._recovery.handle_error("animator", exc)
|
| 334 |
+
self._metrics.record_error("animator")
|
| 335 |
+
if action == FallbackAction.FREEZE_LAST_FRAME:
|
| 336 |
+
logger.warning("Animator failed — freezing last frame")
|
| 337 |
+
# Don't raise; the pipeline simply stops yielding frames
|
| 338 |
+
else:
|
| 339 |
+
raise
|
| 340 |
+
finally:
|
| 341 |
+
total_ms = self._latency.end_stage("total")
|
| 342 |
+
self._metrics.record("total_ms", total_ms)
|
| 343 |
+
|
| 344 |
+
# Store assistant response in history
|
| 345 |
+
full_response = "".join(c.text for c in text_chunks)
|
| 346 |
+
self._sessions.add_history(session_id, "assistant", full_response)
|
| 347 |
+
|
| 348 |
+
session.avatar_state = AvatarState.IDLE
|
| 349 |
+
logger.debug(
|
| 350 |
+
"Session '{}': pipeline complete ({:.0f}ms total, {} frames)",
|
| 351 |
+
session_id,
|
| 352 |
+
total_ms,
|
| 353 |
+
frame_count,
|
| 354 |
+
)
|
| 355 |
+
|
| 356 |
+
async def get_metrics(self, session_id: str) -> PipelineMetrics:
|
| 357 |
+
"""Return current pipeline metrics for a session."""
|
| 358 |
+
# Ensure session exists (will raise if not)
|
| 359 |
+
self._sessions.get(session_id)
|
| 360 |
+
return self._metrics.get_metrics()
|
| 361 |
+
|
| 362 |
+
# ------------------------------------------------------------------
|
| 363 |
+
# Extended API (beyond IConductor)
|
| 364 |
+
# ------------------------------------------------------------------
|
| 365 |
+
|
| 366 |
+
def get_latency_report(self) -> dict:
|
| 367 |
+
"""Full latency report across all stages."""
|
| 368 |
+
return self._latency.get_report()
|
| 369 |
+
|
| 370 |
+
def get_error_stats(self) -> dict:
|
| 371 |
+
"""Error statistics for all engines."""
|
| 372 |
+
return self._recovery.get_error_stats()
|
| 373 |
+
|
| 374 |
+
def get_metrics_summary(self) -> dict:
|
| 375 |
+
"""Rich metrics summary with percentiles."""
|
| 376 |
+
return self._metrics.get_summary()
|
| 377 |
+
|
| 378 |
+
def get_session_manager(self) -> SessionManager:
|
| 379 |
+
"""Expose session manager for Gateway integration."""
|
| 380 |
+
return self._sessions
|
eden_os/conductor/session_manager.py
ADDED
|
@@ -0,0 +1,120 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
EDEN OS — Conductor: Session Manager
|
| 3 |
+
Manages the lifecycle of conversation sessions.
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
from __future__ import annotations
|
| 7 |
+
|
| 8 |
+
import time
|
| 9 |
+
import uuid
|
| 10 |
+
from dataclasses import dataclass, field
|
| 11 |
+
from enum import Enum
|
| 12 |
+
from typing import Any
|
| 13 |
+
|
| 14 |
+
from loguru import logger
|
| 15 |
+
|
| 16 |
+
from eden_os.shared.types import AvatarState, SessionConfig
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
class SessionState(Enum):
|
| 20 |
+
CREATED = "created"
|
| 21 |
+
INITIALIZING = "initializing"
|
| 22 |
+
READY = "ready"
|
| 23 |
+
ACTIVE = "active"
|
| 24 |
+
PAUSED = "paused"
|
| 25 |
+
ENDED = "ended"
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
@dataclass
|
| 29 |
+
class Session:
|
| 30 |
+
"""Internal representation of a single conversation session."""
|
| 31 |
+
session_id: str
|
| 32 |
+
config: SessionConfig
|
| 33 |
+
state: SessionState = SessionState.CREATED
|
| 34 |
+
avatar_state: AvatarState = AvatarState.IDLE
|
| 35 |
+
engines: dict[str, Any] = field(default_factory=dict)
|
| 36 |
+
conversation_history: list[dict[str, str]] = field(default_factory=list)
|
| 37 |
+
metrics: dict[str, Any] = field(default_factory=dict)
|
| 38 |
+
created_at: float = field(default_factory=time.time)
|
| 39 |
+
idle_cache: dict | None = None
|
| 40 |
+
portrait_profile: dict | None = None
|
| 41 |
+
|
| 42 |
+
|
| 43 |
+
class SessionManager:
|
| 44 |
+
"""Stores and manages conversation sessions keyed by UUID.
|
| 45 |
+
|
| 46 |
+
Usage::
|
| 47 |
+
|
| 48 |
+
mgr = SessionManager()
|
| 49 |
+
sid = mgr.create(config)
|
| 50 |
+
session = mgr.get(sid)
|
| 51 |
+
mgr.update_settings(sid, {"expressiveness": 0.9})
|
| 52 |
+
mgr.destroy(sid)
|
| 53 |
+
"""
|
| 54 |
+
|
| 55 |
+
def __init__(self) -> None:
|
| 56 |
+
self._sessions: dict[str, Session] = {}
|
| 57 |
+
|
| 58 |
+
# ------------------------------------------------------------------
|
| 59 |
+
# CRUD
|
| 60 |
+
# ------------------------------------------------------------------
|
| 61 |
+
|
| 62 |
+
def create(self, config: SessionConfig) -> str:
|
| 63 |
+
"""Create a new session, returning its UUID session_id.
|
| 64 |
+
|
| 65 |
+
If ``config.session_id`` is already set (non-empty), it is honoured;
|
| 66 |
+
otherwise a fresh UUID is generated.
|
| 67 |
+
"""
|
| 68 |
+
sid = config.session_id or str(uuid.uuid4())
|
| 69 |
+
config.session_id = sid
|
| 70 |
+
|
| 71 |
+
if sid in self._sessions:
|
| 72 |
+
logger.warning("Session '{}' already exists — returning existing", sid)
|
| 73 |
+
return sid
|
| 74 |
+
|
| 75 |
+
session = Session(session_id=sid, config=config)
|
| 76 |
+
self._sessions[sid] = session
|
| 77 |
+
logger.info("Session created: {}", sid)
|
| 78 |
+
return sid
|
| 79 |
+
|
| 80 |
+
def get(self, session_id: str) -> Session:
|
| 81 |
+
"""Retrieve a session by ID. Raises ``KeyError`` if not found."""
|
| 82 |
+
session = self._sessions.get(session_id)
|
| 83 |
+
if session is None:
|
| 84 |
+
raise KeyError(f"Session '{session_id}' not found")
|
| 85 |
+
return session
|
| 86 |
+
|
| 87 |
+
def exists(self, session_id: str) -> bool:
|
| 88 |
+
return session_id in self._sessions
|
| 89 |
+
|
| 90 |
+
def update_settings(self, session_id: str, settings: dict) -> None:
|
| 91 |
+
"""Merge *settings* into the session's config.settings dict."""
|
| 92 |
+
session = self.get(session_id)
|
| 93 |
+
session.config.settings.update(settings)
|
| 94 |
+
logger.info("Session '{}' settings updated: {}", session_id, list(settings.keys()))
|
| 95 |
+
|
| 96 |
+
def set_state(self, session_id: str, state: SessionState) -> None:
|
| 97 |
+
session = self.get(session_id)
|
| 98 |
+
old = session.state
|
| 99 |
+
session.state = state
|
| 100 |
+
logger.debug("Session '{}' state: {} -> {}", session_id, old.value, state.value)
|
| 101 |
+
|
| 102 |
+
def add_history(self, session_id: str, role: str, content: str) -> None:
|
| 103 |
+
"""Append a message to conversation history (sliding window of 40)."""
|
| 104 |
+
session = self.get(session_id)
|
| 105 |
+
session.conversation_history.append({"role": role, "content": content})
|
| 106 |
+
if len(session.conversation_history) > 40:
|
| 107 |
+
session.conversation_history = session.conversation_history[-40:]
|
| 108 |
+
|
| 109 |
+
def destroy(self, session_id: str) -> None:
|
| 110 |
+
"""Remove a session and release references."""
|
| 111 |
+
if session_id in self._sessions:
|
| 112 |
+
session = self._sessions.pop(session_id)
|
| 113 |
+
session.state = SessionState.ENDED
|
| 114 |
+
session.engines.clear()
|
| 115 |
+
logger.info("Session destroyed: {}", session_id)
|
| 116 |
+
else:
|
| 117 |
+
logger.warning("destroy called for unknown session '{}'", session_id)
|
| 118 |
+
|
| 119 |
+
def list_sessions(self) -> list[str]:
|
| 120 |
+
return list(self._sessions.keys())
|
eden_os/gateway/__init__.py
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
EDEN OS -- Gateway (Agent 6)
|
| 3 |
+
WebSocket/WebRTC Server + REST API Layer.
|
| 4 |
+
|
| 5 |
+
Exports:
|
| 6 |
+
create_app -- factory function that builds and returns a FastAPI application.
|
| 7 |
+
"""
|
| 8 |
+
|
| 9 |
+
from eden_os.gateway.api_server import create_app
|
| 10 |
+
|
| 11 |
+
__all__ = ["create_app"]
|
eden_os/gateway/api_server.py
ADDED
|
@@ -0,0 +1,521 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
EDEN OS -- Gateway API Server (Agent 6)
|
| 3 |
+
FastAPI application with all REST + WebSocket endpoints.
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
from __future__ import annotations
|
| 7 |
+
|
| 8 |
+
import os
|
| 9 |
+
import time
|
| 10 |
+
import uuid
|
| 11 |
+
from pathlib import Path
|
| 12 |
+
from typing import Any, Optional
|
| 13 |
+
|
| 14 |
+
from fastapi import FastAPI, HTTPException, WebSocket, WebSocketDisconnect
|
| 15 |
+
from fastapi.middleware.cors import CORSMiddleware
|
| 16 |
+
from fastapi.staticfiles import StaticFiles
|
| 17 |
+
from loguru import logger
|
| 18 |
+
from pydantic import BaseModel, Field
|
| 19 |
+
|
| 20 |
+
from eden_os.gateway.websocket_handler import WebSocketHandler
|
| 21 |
+
|
| 22 |
+
# ---------------------------------------------------------------------------
|
| 23 |
+
# Pydantic request / response schemas
|
| 24 |
+
# ---------------------------------------------------------------------------
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
class CreateSessionRequest(BaseModel):
|
| 28 |
+
portrait_image: Optional[str] = Field(None, description="Base64-encoded portrait image")
|
| 29 |
+
template: str = Field("default", description="Agent template name")
|
| 30 |
+
|
| 31 |
+
|
| 32 |
+
class CreateSessionResponse(BaseModel):
|
| 33 |
+
session_id: str
|
| 34 |
+
ws_url: str
|
| 35 |
+
status: str = "ready"
|
| 36 |
+
|
| 37 |
+
|
| 38 |
+
class SessionStatusResponse(BaseModel):
|
| 39 |
+
session_id: str
|
| 40 |
+
state: str
|
| 41 |
+
metrics: dict = Field(default_factory=dict)
|
| 42 |
+
|
| 43 |
+
|
| 44 |
+
class SettingsUpdateRequest(BaseModel):
|
| 45 |
+
expressiveness: Optional[float] = None
|
| 46 |
+
eye_contact: Optional[float] = None
|
| 47 |
+
voice_tone: Optional[float] = None
|
| 48 |
+
consistency: Optional[float] = None
|
| 49 |
+
latency: Optional[float] = None
|
| 50 |
+
flirtation: Optional[float] = None
|
| 51 |
+
|
| 52 |
+
|
| 53 |
+
class SettingsUpdateResponse(BaseModel):
|
| 54 |
+
applied: bool = True
|
| 55 |
+
|
| 56 |
+
|
| 57 |
+
class PipelineSwapRequest(BaseModel):
|
| 58 |
+
tts_engine: Optional[str] = None
|
| 59 |
+
animation_engine: Optional[str] = None
|
| 60 |
+
|
| 61 |
+
|
| 62 |
+
class PipelineSwapResponse(BaseModel):
|
| 63 |
+
swapped: bool = True
|
| 64 |
+
reload_time_ms: float = 0.0
|
| 65 |
+
|
| 66 |
+
|
| 67 |
+
class KnowledgeIngestRequest(BaseModel):
|
| 68 |
+
type: str = Field(..., description="youtube | audiobook | url")
|
| 69 |
+
url: Optional[str] = None
|
| 70 |
+
file: Optional[str] = None # base64 for audiobook
|
| 71 |
+
|
| 72 |
+
|
| 73 |
+
class KnowledgeIngestResponse(BaseModel):
|
| 74 |
+
job_id: str
|
| 75 |
+
status: str = "processing"
|
| 76 |
+
chunks_estimated: int = 0
|
| 77 |
+
|
| 78 |
+
|
| 79 |
+
class TemplateInfo(BaseModel):
|
| 80 |
+
name: str
|
| 81 |
+
description: str = ""
|
| 82 |
+
|
| 83 |
+
|
| 84 |
+
class HealthResponse(BaseModel):
|
| 85 |
+
status: str = "ok"
|
| 86 |
+
gpu: dict = Field(default_factory=dict)
|
| 87 |
+
active_sessions: int = 0
|
| 88 |
+
uptime_seconds: float = 0.0
|
| 89 |
+
|
| 90 |
+
|
| 91 |
+
# ---------------------------------------------------------------------------
|
| 92 |
+
# Application state (held on the app instance via app.state)
|
| 93 |
+
# ---------------------------------------------------------------------------
|
| 94 |
+
|
| 95 |
+
|
| 96 |
+
class _AppState:
|
| 97 |
+
"""Mutable state attached to the FastAPI app."""
|
| 98 |
+
|
| 99 |
+
def __init__(self, hardware_profile: str, models_cache: str):
|
| 100 |
+
self.hardware_profile = hardware_profile
|
| 101 |
+
self.models_cache = models_cache
|
| 102 |
+
self.boot_time: float = time.time()
|
| 103 |
+
# session_id -> session dict
|
| 104 |
+
self.sessions: dict[str, dict[str, Any]] = {}
|
| 105 |
+
# Conductor is lazy-initialized the first time a session is created
|
| 106 |
+
self._conductor: Any = None
|
| 107 |
+
# Scholar is lazy-initialized on first knowledge ingest
|
| 108 |
+
self._scholar: Any = None
|
| 109 |
+
# WebSocket handler pool: session_id -> WebSocketHandler
|
| 110 |
+
self.ws_handlers: dict[str, WebSocketHandler] = {}
|
| 111 |
+
|
| 112 |
+
# ------------------------------------------------------------------
|
| 113 |
+
@property
|
| 114 |
+
def conductor(self) -> Any:
|
| 115 |
+
"""Lazy-load the Conductor so the gateway can start even if
|
| 116 |
+
other engines are not yet installed."""
|
| 117 |
+
if self._conductor is None:
|
| 118 |
+
try:
|
| 119 |
+
from eden_os.conductor import Conductor # type: ignore[import-untyped]
|
| 120 |
+
|
| 121 |
+
self._conductor = Conductor(
|
| 122 |
+
hardware_profile=self.hardware_profile,
|
| 123 |
+
models_cache=self.models_cache,
|
| 124 |
+
)
|
| 125 |
+
logger.info("Conductor lazy-initialized successfully.")
|
| 126 |
+
except Exception as exc:
|
| 127 |
+
logger.warning(
|
| 128 |
+
f"Conductor not available (stub mode): {exc}"
|
| 129 |
+
)
|
| 130 |
+
self._conductor = _StubConductor()
|
| 131 |
+
return self._conductor
|
| 132 |
+
|
| 133 |
+
@property
|
| 134 |
+
def scholar(self) -> Any:
|
| 135 |
+
if self._scholar is None:
|
| 136 |
+
try:
|
| 137 |
+
from eden_os.scholar import ScholarEngine
|
| 138 |
+
|
| 139 |
+
self._scholar = ScholarEngine()
|
| 140 |
+
logger.info("ScholarEngine lazy-initialized successfully.")
|
| 141 |
+
except Exception as exc:
|
| 142 |
+
logger.warning(
|
| 143 |
+
f"ScholarEngine not available (stub mode): {exc}"
|
| 144 |
+
)
|
| 145 |
+
self._scholar = None
|
| 146 |
+
return self._scholar
|
| 147 |
+
|
| 148 |
+
|
| 149 |
+
class _StubConductor:
|
| 150 |
+
"""Minimal stub so the API can respond even when the real Conductor
|
| 151 |
+
is not installed yet."""
|
| 152 |
+
|
| 153 |
+
async def create_session(self, config: Any) -> str:
|
| 154 |
+
return str(uuid.uuid4())
|
| 155 |
+
|
| 156 |
+
async def start_conversation(self, session_id: str) -> None:
|
| 157 |
+
pass
|
| 158 |
+
|
| 159 |
+
async def end_conversation(self, session_id: str) -> None:
|
| 160 |
+
pass
|
| 161 |
+
|
| 162 |
+
async def get_metrics(self, session_id: str) -> dict:
|
| 163 |
+
return {}
|
| 164 |
+
|
| 165 |
+
async def interrupt(self, session_id: str) -> None:
|
| 166 |
+
pass
|
| 167 |
+
|
| 168 |
+
async def update_settings(self, session_id: str, settings: dict) -> None:
|
| 169 |
+
pass
|
| 170 |
+
|
| 171 |
+
async def swap_pipeline(self, session_id: str, swap: dict) -> float:
|
| 172 |
+
return 0.0
|
| 173 |
+
|
| 174 |
+
|
| 175 |
+
# ---------------------------------------------------------------------------
|
| 176 |
+
# Template helpers
|
| 177 |
+
# ---------------------------------------------------------------------------
|
| 178 |
+
|
| 179 |
+
_TEMPLATES_DIR = Path(__file__).resolve().parents[2] / "templates"
|
| 180 |
+
|
| 181 |
+
|
| 182 |
+
def _list_templates() -> list[TemplateInfo]:
|
| 183 |
+
"""Scan the templates/ directory for YAML files."""
|
| 184 |
+
templates: list[TemplateInfo] = []
|
| 185 |
+
if _TEMPLATES_DIR.is_dir():
|
| 186 |
+
for p in sorted(_TEMPLATES_DIR.glob("*.yaml")):
|
| 187 |
+
templates.append(
|
| 188 |
+
TemplateInfo(name=p.stem, description=f"Template: {p.stem}")
|
| 189 |
+
)
|
| 190 |
+
if not templates:
|
| 191 |
+
templates.append(TemplateInfo(name="default", description="Default EVE template"))
|
| 192 |
+
return templates
|
| 193 |
+
|
| 194 |
+
|
| 195 |
+
# ---------------------------------------------------------------------------
|
| 196 |
+
# GPU info helper
|
| 197 |
+
# ---------------------------------------------------------------------------
|
| 198 |
+
|
| 199 |
+
|
| 200 |
+
def _gpu_info() -> dict:
|
| 201 |
+
"""Best-effort GPU info via torch.cuda."""
|
| 202 |
+
try:
|
| 203 |
+
import torch
|
| 204 |
+
|
| 205 |
+
if torch.cuda.is_available():
|
| 206 |
+
idx = torch.cuda.current_device()
|
| 207 |
+
return {
|
| 208 |
+
"name": torch.cuda.get_device_name(idx),
|
| 209 |
+
"memory_used_mb": round(
|
| 210 |
+
torch.cuda.memory_allocated(idx) / 1024 / 1024, 1
|
| 211 |
+
),
|
| 212 |
+
"memory_total_mb": round(
|
| 213 |
+
torch.cuda.get_device_properties(idx).total_mem / 1024 / 1024, 1
|
| 214 |
+
),
|
| 215 |
+
}
|
| 216 |
+
except Exception:
|
| 217 |
+
pass
|
| 218 |
+
return {"name": "none", "memory_used_mb": 0, "memory_total_mb": 0}
|
| 219 |
+
|
| 220 |
+
|
| 221 |
+
# ---------------------------------------------------------------------------
|
| 222 |
+
# Factory
|
| 223 |
+
# ---------------------------------------------------------------------------
|
| 224 |
+
|
| 225 |
+
|
| 226 |
+
def create_app(
|
| 227 |
+
host: str = "0.0.0.0",
|
| 228 |
+
port: int = 7860,
|
| 229 |
+
hardware_profile: str = "auto",
|
| 230 |
+
models_cache: str = "models_cache",
|
| 231 |
+
) -> FastAPI:
|
| 232 |
+
"""Build and return the Gateway FastAPI application.
|
| 233 |
+
|
| 234 |
+
Parameters
|
| 235 |
+
----------
|
| 236 |
+
host:
|
| 237 |
+
Bind address (informational — used when constructing ws_url).
|
| 238 |
+
port:
|
| 239 |
+
Bind port (informational — used when constructing ws_url).
|
| 240 |
+
hardware_profile:
|
| 241 |
+
Hardware tier string forwarded to the Conductor.
|
| 242 |
+
models_cache:
|
| 243 |
+
Path to the downloaded model weights directory.
|
| 244 |
+
"""
|
| 245 |
+
|
| 246 |
+
app = FastAPI(
|
| 247 |
+
title="EDEN OS Gateway",
|
| 248 |
+
version="1.0.0",
|
| 249 |
+
description="4D Bi-Directional Conversational Avatar API",
|
| 250 |
+
)
|
| 251 |
+
|
| 252 |
+
# -- CORS (allow all for dev) -----------------------------------------
|
| 253 |
+
app.add_middleware(
|
| 254 |
+
CORSMiddleware,
|
| 255 |
+
allow_origins=["*"],
|
| 256 |
+
allow_credentials=True,
|
| 257 |
+
allow_methods=["*"],
|
| 258 |
+
allow_headers=["*"],
|
| 259 |
+
)
|
| 260 |
+
|
| 261 |
+
# -- Shared state -----------------------------------------------------
|
| 262 |
+
state = _AppState(hardware_profile=hardware_profile, models_cache=models_cache)
|
| 263 |
+
app.state.eden = state
|
| 264 |
+
# Store host/port for ws_url construction
|
| 265 |
+
app.state.host = host
|
| 266 |
+
app.state.port = port
|
| 267 |
+
|
| 268 |
+
# -- Static files (EDEN Studio frontend) ------------------------------
|
| 269 |
+
static_dir = Path(__file__).resolve().parents[2] / "static"
|
| 270 |
+
_static_dir = static_dir # capture for closure
|
| 271 |
+
|
| 272 |
+
# ======================================================================
|
| 273 |
+
# REST Endpoints
|
| 274 |
+
# ======================================================================
|
| 275 |
+
|
| 276 |
+
@app.post("/api/v1/sessions", response_model=CreateSessionResponse)
|
| 277 |
+
async def create_session(req: CreateSessionRequest) -> CreateSessionResponse:
|
| 278 |
+
"""Create a new conversation session."""
|
| 279 |
+
session_id = str(uuid.uuid4())
|
| 280 |
+
ws_url = f"ws://{app.state.host}:{app.state.port}/api/v1/sessions/{session_id}/stream"
|
| 281 |
+
|
| 282 |
+
state.sessions[session_id] = {
|
| 283 |
+
"id": session_id,
|
| 284 |
+
"template": req.template,
|
| 285 |
+
"state": "ready",
|
| 286 |
+
"settings": {
|
| 287 |
+
"expressiveness": 0.6,
|
| 288 |
+
"eye_contact": 0.5,
|
| 289 |
+
"voice_tone": 0.85,
|
| 290 |
+
"consistency": 0.7,
|
| 291 |
+
"latency": 1.0,
|
| 292 |
+
"flirtation": 0.15,
|
| 293 |
+
},
|
| 294 |
+
"created_at": time.time(),
|
| 295 |
+
}
|
| 296 |
+
|
| 297 |
+
# Delegate to the Conductor (lazy-loaded)
|
| 298 |
+
try:
|
| 299 |
+
from eden_os.shared.types import SessionConfig
|
| 300 |
+
import numpy as np
|
| 301 |
+
import base64
|
| 302 |
+
|
| 303 |
+
# Decode the portrait image from base64 to numpy (best-effort)
|
| 304 |
+
portrait_array = None
|
| 305 |
+
if req.portrait_image:
|
| 306 |
+
try:
|
| 307 |
+
raw = base64.b64decode(req.portrait_image)
|
| 308 |
+
portrait_array = np.frombuffer(raw, dtype=np.uint8)
|
| 309 |
+
except Exception:
|
| 310 |
+
portrait_array = None
|
| 311 |
+
|
| 312 |
+
config = SessionConfig(
|
| 313 |
+
session_id=session_id,
|
| 314 |
+
portrait_image=portrait_array,
|
| 315 |
+
template_name=req.template,
|
| 316 |
+
hardware_profile=state.hardware_profile,
|
| 317 |
+
)
|
| 318 |
+
await state.conductor.create_session(config)
|
| 319 |
+
except Exception as exc:
|
| 320 |
+
logger.warning(f"Conductor session creation deferred: {exc}")
|
| 321 |
+
|
| 322 |
+
logger.info(f"Session created: {session_id} (template={req.template})")
|
| 323 |
+
return CreateSessionResponse(
|
| 324 |
+
session_id=session_id, ws_url=ws_url, status="ready"
|
| 325 |
+
)
|
| 326 |
+
|
| 327 |
+
@app.delete("/api/v1/sessions/{session_id}")
|
| 328 |
+
async def delete_session(session_id: str) -> dict:
|
| 329 |
+
"""End and clean up a session."""
|
| 330 |
+
if session_id not in state.sessions:
|
| 331 |
+
raise HTTPException(status_code=404, detail="Session not found")
|
| 332 |
+
|
| 333 |
+
# Clean up WebSocket handler if active
|
| 334 |
+
handler = state.ws_handlers.pop(session_id, None)
|
| 335 |
+
if handler is not None:
|
| 336 |
+
await handler.close()
|
| 337 |
+
|
| 338 |
+
try:
|
| 339 |
+
await state.conductor.end_conversation(session_id)
|
| 340 |
+
except Exception as exc:
|
| 341 |
+
logger.warning(f"Conductor cleanup warning: {exc}")
|
| 342 |
+
|
| 343 |
+
del state.sessions[session_id]
|
| 344 |
+
logger.info(f"Session deleted: {session_id}")
|
| 345 |
+
return {"deleted": True, "session_id": session_id}
|
| 346 |
+
|
| 347 |
+
@app.get(
|
| 348 |
+
"/api/v1/sessions/{session_id}/status",
|
| 349 |
+
response_model=SessionStatusResponse,
|
| 350 |
+
)
|
| 351 |
+
async def session_status(session_id: str) -> SessionStatusResponse:
|
| 352 |
+
"""Return current session state and pipeline metrics."""
|
| 353 |
+
if session_id not in state.sessions:
|
| 354 |
+
raise HTTPException(status_code=404, detail="Session not found")
|
| 355 |
+
|
| 356 |
+
session = state.sessions[session_id]
|
| 357 |
+
metrics: dict = {}
|
| 358 |
+
try:
|
| 359 |
+
raw_metrics = await state.conductor.get_metrics(session_id)
|
| 360 |
+
if hasattr(raw_metrics, "__dict__"):
|
| 361 |
+
metrics = {
|
| 362 |
+
k: v
|
| 363 |
+
for k, v in raw_metrics.__dict__.items()
|
| 364 |
+
if not k.startswith("_")
|
| 365 |
+
}
|
| 366 |
+
elif isinstance(raw_metrics, dict):
|
| 367 |
+
metrics = raw_metrics
|
| 368 |
+
except Exception:
|
| 369 |
+
pass
|
| 370 |
+
|
| 371 |
+
return SessionStatusResponse(
|
| 372 |
+
session_id=session_id,
|
| 373 |
+
state=session.get("state", "unknown"),
|
| 374 |
+
metrics=metrics,
|
| 375 |
+
)
|
| 376 |
+
|
| 377 |
+
@app.post("/api/v1/sessions/{session_id}/interrupt")
|
| 378 |
+
async def interrupt_session(session_id: str) -> dict:
|
| 379 |
+
"""Force-interrupt the avatar (stop speaking, return to LISTENING)."""
|
| 380 |
+
if session_id not in state.sessions:
|
| 381 |
+
raise HTTPException(status_code=404, detail="Session not found")
|
| 382 |
+
|
| 383 |
+
state.sessions[session_id]["state"] = "listening"
|
| 384 |
+
try:
|
| 385 |
+
await state.conductor.interrupt(session_id)
|
| 386 |
+
except Exception as exc:
|
| 387 |
+
logger.warning(f"Conductor interrupt warning: {exc}")
|
| 388 |
+
|
| 389 |
+
# Notify WebSocket handler
|
| 390 |
+
handler = state.ws_handlers.get(session_id)
|
| 391 |
+
if handler is not None:
|
| 392 |
+
await handler.handle_interrupt()
|
| 393 |
+
|
| 394 |
+
logger.info(f"Session interrupted: {session_id}")
|
| 395 |
+
return {"interrupted": True}
|
| 396 |
+
|
| 397 |
+
@app.put(
|
| 398 |
+
"/api/v1/sessions/{session_id}/settings",
|
| 399 |
+
response_model=SettingsUpdateResponse,
|
| 400 |
+
)
|
| 401 |
+
async def update_settings(
|
| 402 |
+
session_id: str, req: SettingsUpdateRequest
|
| 403 |
+
) -> SettingsUpdateResponse:
|
| 404 |
+
"""Update behavioral sliders in real-time."""
|
| 405 |
+
if session_id not in state.sessions:
|
| 406 |
+
raise HTTPException(status_code=404, detail="Session not found")
|
| 407 |
+
|
| 408 |
+
updates = req.model_dump(exclude_none=True)
|
| 409 |
+
state.sessions[session_id]["settings"].update(updates)
|
| 410 |
+
|
| 411 |
+
try:
|
| 412 |
+
await state.conductor.update_settings(session_id, updates)
|
| 413 |
+
except Exception as exc:
|
| 414 |
+
logger.warning(f"Conductor settings update warning: {exc}")
|
| 415 |
+
|
| 416 |
+
logger.info(f"Settings updated for {session_id}: {updates}")
|
| 417 |
+
return SettingsUpdateResponse(applied=True)
|
| 418 |
+
|
| 419 |
+
@app.put(
|
| 420 |
+
"/api/v1/sessions/{session_id}/pipeline",
|
| 421 |
+
response_model=PipelineSwapResponse,
|
| 422 |
+
)
|
| 423 |
+
async def swap_pipeline(
|
| 424 |
+
session_id: str, req: PipelineSwapRequest
|
| 425 |
+
) -> PipelineSwapResponse:
|
| 426 |
+
"""Swap models mid-session without restart."""
|
| 427 |
+
if session_id not in state.sessions:
|
| 428 |
+
raise HTTPException(status_code=404, detail="Session not found")
|
| 429 |
+
|
| 430 |
+
swap_config = req.model_dump(exclude_none=True)
|
| 431 |
+
reload_ms = 0.0
|
| 432 |
+
try:
|
| 433 |
+
reload_ms = await state.conductor.swap_pipeline(session_id, swap_config)
|
| 434 |
+
except Exception as exc:
|
| 435 |
+
logger.warning(f"Pipeline swap warning: {exc}")
|
| 436 |
+
|
| 437 |
+
logger.info(f"Pipeline swapped for {session_id}: {swap_config}")
|
| 438 |
+
return PipelineSwapResponse(swapped=True, reload_time_ms=reload_ms)
|
| 439 |
+
|
| 440 |
+
@app.get("/api/v1/templates", response_model=list[TemplateInfo])
|
| 441 |
+
async def list_templates() -> list[TemplateInfo]:
|
| 442 |
+
"""List available agent templates."""
|
| 443 |
+
return _list_templates()
|
| 444 |
+
|
| 445 |
+
@app.post(
|
| 446 |
+
"/api/v1/knowledge/ingest", response_model=KnowledgeIngestResponse
|
| 447 |
+
)
|
| 448 |
+
async def ingest_knowledge(
|
| 449 |
+
req: KnowledgeIngestRequest,
|
| 450 |
+
) -> KnowledgeIngestResponse:
|
| 451 |
+
"""Ingest YouTube / audiobook / URL into the knowledge base."""
|
| 452 |
+
job_id = str(uuid.uuid4())
|
| 453 |
+
source = req.url or req.file or ""
|
| 454 |
+
|
| 455 |
+
scholar = state.scholar
|
| 456 |
+
if scholar is not None and hasattr(scholar, "queue_job"):
|
| 457 |
+
try:
|
| 458 |
+
job_id = scholar.queue_job(req.type, source)
|
| 459 |
+
except Exception as exc:
|
| 460 |
+
logger.warning(f"Scholar queue_job failed: {exc}")
|
| 461 |
+
|
| 462 |
+
logger.info(f"Knowledge ingest queued: type={req.type}, job_id={job_id}")
|
| 463 |
+
return KnowledgeIngestResponse(
|
| 464 |
+
job_id=job_id, status="processing", chunks_estimated=0
|
| 465 |
+
)
|
| 466 |
+
|
| 467 |
+
@app.get("/api/v1/health", response_model=HealthResponse)
|
| 468 |
+
async def health() -> HealthResponse:
|
| 469 |
+
"""System health check."""
|
| 470 |
+
return HealthResponse(
|
| 471 |
+
status="ok",
|
| 472 |
+
gpu=_gpu_info(),
|
| 473 |
+
active_sessions=len(state.sessions),
|
| 474 |
+
uptime_seconds=round(time.time() - state.boot_time, 1),
|
| 475 |
+
)
|
| 476 |
+
|
| 477 |
+
# ======================================================================
|
| 478 |
+
# WebSocket endpoint
|
| 479 |
+
# ======================================================================
|
| 480 |
+
|
| 481 |
+
@app.websocket("/api/v1/sessions/{session_id}/stream")
|
| 482 |
+
async def websocket_stream(websocket: WebSocket, session_id: str) -> None:
|
| 483 |
+
"""Bi-directional audio/video streaming over WebSocket."""
|
| 484 |
+
if session_id not in state.sessions:
|
| 485 |
+
await websocket.close(code=4004, reason="Session not found")
|
| 486 |
+
return
|
| 487 |
+
|
| 488 |
+
await websocket.accept()
|
| 489 |
+
logger.info(f"WebSocket connected: {session_id}")
|
| 490 |
+
|
| 491 |
+
handler = WebSocketHandler(
|
| 492 |
+
session_id=session_id,
|
| 493 |
+
websocket=websocket,
|
| 494 |
+
conductor=state.conductor,
|
| 495 |
+
)
|
| 496 |
+
state.ws_handlers[session_id] = handler
|
| 497 |
+
|
| 498 |
+
try:
|
| 499 |
+
await handler.run()
|
| 500 |
+
except WebSocketDisconnect:
|
| 501 |
+
logger.info(f"WebSocket disconnected: {session_id}")
|
| 502 |
+
except Exception as exc:
|
| 503 |
+
logger.error(f"WebSocket error ({session_id}): {exc}")
|
| 504 |
+
finally:
|
| 505 |
+
state.ws_handlers.pop(session_id, None)
|
| 506 |
+
logger.info(f"WebSocket handler removed: {session_id}")
|
| 507 |
+
|
| 508 |
+
# -- Mount static files + root route (MUST be after all API routes) ------
|
| 509 |
+
if _static_dir.is_dir():
|
| 510 |
+
from fastapi.responses import FileResponse
|
| 511 |
+
|
| 512 |
+
@app.get("/", include_in_schema=False)
|
| 513 |
+
async def serve_frontend():
|
| 514 |
+
return FileResponse(str(_static_dir / "index.html"))
|
| 515 |
+
|
| 516 |
+
app.mount("/static", StaticFiles(directory=str(_static_dir), html=True), name="static")
|
| 517 |
+
logger.info(f"Static files mounted from {_static_dir}")
|
| 518 |
+
else:
|
| 519 |
+
logger.warning(f"Static directory not found at {_static_dir} — skipping mount.")
|
| 520 |
+
|
| 521 |
+
return app
|
eden_os/gateway/audio_capture.py
ADDED
|
@@ -0,0 +1,154 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
EDEN OS -- Audio Capture & Processing (Agent 6)
|
| 3 |
+
Decodes incoming base64 PCM audio, applies noise gate, resamples to 16kHz.
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
from __future__ import annotations
|
| 7 |
+
|
| 8 |
+
import base64
|
| 9 |
+
from typing import Optional
|
| 10 |
+
|
| 11 |
+
import numpy as np
|
| 12 |
+
from loguru import logger
|
| 13 |
+
|
| 14 |
+
from eden_os.shared.types import AudioChunk
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
class AudioCapture:
|
| 18 |
+
"""Processes raw base64-encoded PCM audio from WebSocket clients.
|
| 19 |
+
|
| 20 |
+
Pipeline:
|
| 21 |
+
1. Decode base64 -> raw bytes -> int16 numpy array
|
| 22 |
+
2. Normalise to float32 [-1, 1]
|
| 23 |
+
3. Apply noise gate (threshold-based)
|
| 24 |
+
4. Resample to target_sr if the source sample-rate differs
|
| 25 |
+
5. Return an AudioChunk or None (if gated out)
|
| 26 |
+
"""
|
| 27 |
+
|
| 28 |
+
def __init__(
|
| 29 |
+
self,
|
| 30 |
+
target_sr: int = 16_000,
|
| 31 |
+
noise_gate_threshold: float = 0.01,
|
| 32 |
+
source_sr: int = 48_000,
|
| 33 |
+
dtype: str = "int16",
|
| 34 |
+
) -> None:
|
| 35 |
+
"""
|
| 36 |
+
Parameters
|
| 37 |
+
----------
|
| 38 |
+
target_sr:
|
| 39 |
+
Output sample rate (default 16 kHz for ASR models).
|
| 40 |
+
noise_gate_threshold:
|
| 41 |
+
RMS threshold below which audio is considered silence.
|
| 42 |
+
Range [0.0, 1.0] relative to float32 normalised amplitude.
|
| 43 |
+
source_sr:
|
| 44 |
+
Expected sample rate of the incoming PCM data.
|
| 45 |
+
Common values: 48000 (browser default), 44100, 16000.
|
| 46 |
+
dtype:
|
| 47 |
+
Numpy dtype of the incoming raw PCM samples.
|
| 48 |
+
"""
|
| 49 |
+
self.target_sr = target_sr
|
| 50 |
+
self.noise_gate_threshold = noise_gate_threshold
|
| 51 |
+
self.source_sr = source_sr
|
| 52 |
+
self.dtype = dtype
|
| 53 |
+
|
| 54 |
+
# ------------------------------------------------------------------
|
| 55 |
+
# Public
|
| 56 |
+
# ------------------------------------------------------------------
|
| 57 |
+
|
| 58 |
+
def process(
|
| 59 |
+
self,
|
| 60 |
+
b64_pcm: str,
|
| 61 |
+
source_sr: int | None = None,
|
| 62 |
+
is_final: bool = False,
|
| 63 |
+
) -> Optional[AudioChunk]:
|
| 64 |
+
"""Decode, gate, resample, and return an AudioChunk.
|
| 65 |
+
|
| 66 |
+
Returns None if the audio is below the noise gate.
|
| 67 |
+
"""
|
| 68 |
+
sr = source_sr or self.source_sr
|
| 69 |
+
|
| 70 |
+
# 1. Decode base64 -> numpy int16
|
| 71 |
+
try:
|
| 72 |
+
raw_bytes = base64.b64decode(b64_pcm)
|
| 73 |
+
samples = np.frombuffer(raw_bytes, dtype=self.dtype)
|
| 74 |
+
except Exception as exc:
|
| 75 |
+
logger.warning(f"AudioCapture: failed to decode PCM: {exc}")
|
| 76 |
+
return None
|
| 77 |
+
|
| 78 |
+
if samples.size == 0:
|
| 79 |
+
return None
|
| 80 |
+
|
| 81 |
+
# 2. Normalise to float32 [-1, 1]
|
| 82 |
+
audio = samples.astype(np.float32)
|
| 83 |
+
if self.dtype == "int16":
|
| 84 |
+
audio /= 32768.0
|
| 85 |
+
elif self.dtype == "int32":
|
| 86 |
+
audio /= 2147483648.0
|
| 87 |
+
# float32 input is already in [-1, 1]
|
| 88 |
+
|
| 89 |
+
# 3. Noise gate
|
| 90 |
+
rms = float(np.sqrt(np.mean(audio ** 2)))
|
| 91 |
+
if rms < self.noise_gate_threshold:
|
| 92 |
+
return None
|
| 93 |
+
|
| 94 |
+
# 4. Resample to target_sr if needed
|
| 95 |
+
if sr != self.target_sr:
|
| 96 |
+
audio = self._resample(audio, sr, self.target_sr)
|
| 97 |
+
|
| 98 |
+
# 5. Build AudioChunk
|
| 99 |
+
duration_ms = (len(audio) / self.target_sr) * 1000.0
|
| 100 |
+
return AudioChunk(
|
| 101 |
+
data=audio,
|
| 102 |
+
sample_rate=self.target_sr,
|
| 103 |
+
duration_ms=duration_ms,
|
| 104 |
+
is_final=is_final,
|
| 105 |
+
)
|
| 106 |
+
|
| 107 |
+
# ------------------------------------------------------------------
|
| 108 |
+
# Resampling
|
| 109 |
+
# ------------------------------------------------------------------
|
| 110 |
+
|
| 111 |
+
@staticmethod
|
| 112 |
+
def _resample(
|
| 113 |
+
audio: np.ndarray, orig_sr: int, target_sr: int
|
| 114 |
+
) -> np.ndarray:
|
| 115 |
+
"""Simple linear-interpolation resample.
|
| 116 |
+
|
| 117 |
+
For production quality, scipy.signal.resample_poly would be
|
| 118 |
+
better, but we keep zero mandatory heavy dependencies here.
|
| 119 |
+
If scipy is available we use it; otherwise fall back to
|
| 120 |
+
numpy linear interpolation.
|
| 121 |
+
"""
|
| 122 |
+
if orig_sr == target_sr:
|
| 123 |
+
return audio
|
| 124 |
+
|
| 125 |
+
try:
|
| 126 |
+
from scipy.signal import resample_poly
|
| 127 |
+
from math import gcd
|
| 128 |
+
|
| 129 |
+
g = gcd(orig_sr, target_sr)
|
| 130 |
+
up = target_sr // g
|
| 131 |
+
down = orig_sr // g
|
| 132 |
+
return resample_poly(audio, up, down).astype(np.float32)
|
| 133 |
+
except ImportError:
|
| 134 |
+
# Fallback: numpy linear interpolation
|
| 135 |
+
ratio = target_sr / orig_sr
|
| 136 |
+
new_length = int(len(audio) * ratio)
|
| 137 |
+
indices = np.linspace(0, len(audio) - 1, new_length)
|
| 138 |
+
return np.interp(indices, np.arange(len(audio)), audio).astype(
|
| 139 |
+
np.float32
|
| 140 |
+
)
|
| 141 |
+
|
| 142 |
+
# ------------------------------------------------------------------
|
| 143 |
+
# Configuration helpers
|
| 144 |
+
# ------------------------------------------------------------------
|
| 145 |
+
|
| 146 |
+
def set_noise_gate(self, threshold: float) -> None:
|
| 147 |
+
"""Update the noise gate threshold at runtime."""
|
| 148 |
+
self.noise_gate_threshold = max(0.0, min(1.0, threshold))
|
| 149 |
+
logger.debug(f"AudioCapture noise gate set to {self.noise_gate_threshold}")
|
| 150 |
+
|
| 151 |
+
def set_source_sample_rate(self, sr: int) -> None:
|
| 152 |
+
"""Update the expected source sample rate."""
|
| 153 |
+
self.source_sr = sr
|
| 154 |
+
logger.debug(f"AudioCapture source SR set to {sr}")
|
eden_os/gateway/video_encoder.py
ADDED
|
@@ -0,0 +1,141 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
EDEN OS -- Video Encoder (Agent 6)
|
| 3 |
+
Encodes numpy RGB frames to base64 JPEG/PNG for WebSocket transport.
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
from __future__ import annotations
|
| 7 |
+
|
| 8 |
+
import base64
|
| 9 |
+
import io
|
| 10 |
+
from typing import Literal
|
| 11 |
+
|
| 12 |
+
import numpy as np
|
| 13 |
+
from loguru import logger
|
| 14 |
+
|
| 15 |
+
try:
|
| 16 |
+
from PIL import Image
|
| 17 |
+
|
| 18 |
+
_PIL_AVAILABLE = True
|
| 19 |
+
except ImportError: # pragma: no cover
|
| 20 |
+
_PIL_AVAILABLE = False
|
| 21 |
+
|
| 22 |
+
try:
|
| 23 |
+
import cv2
|
| 24 |
+
|
| 25 |
+
_CV2_AVAILABLE = True
|
| 26 |
+
except ImportError: # pragma: no cover
|
| 27 |
+
_CV2_AVAILABLE = False
|
| 28 |
+
|
| 29 |
+
|
| 30 |
+
class VideoEncoder:
|
| 31 |
+
"""Encodes raw numpy RGB frames into base64 strings for WebSocket delivery.
|
| 32 |
+
|
| 33 |
+
Supports JPEG (default, smaller payload) and PNG (lossless fallback).
|
| 34 |
+
Uses OpenCV if available for speed; falls back to Pillow.
|
| 35 |
+
"""
|
| 36 |
+
|
| 37 |
+
def __init__(
|
| 38 |
+
self,
|
| 39 |
+
fmt: Literal["jpeg", "png"] = "jpeg",
|
| 40 |
+
quality: int = 80,
|
| 41 |
+
) -> None:
|
| 42 |
+
"""
|
| 43 |
+
Parameters
|
| 44 |
+
----------
|
| 45 |
+
fmt:
|
| 46 |
+
Output image format. "jpeg" for lossy (smaller), "png" for lossless.
|
| 47 |
+
quality:
|
| 48 |
+
JPEG quality 1-100 (ignored for PNG). Higher = better quality, larger payload.
|
| 49 |
+
"""
|
| 50 |
+
self.fmt = fmt.lower()
|
| 51 |
+
self.quality = max(1, min(100, quality))
|
| 52 |
+
|
| 53 |
+
if not _CV2_AVAILABLE and not _PIL_AVAILABLE:
|
| 54 |
+
logger.warning(
|
| 55 |
+
"VideoEncoder: neither cv2 nor Pillow available — "
|
| 56 |
+
"encode_frame will return empty strings."
|
| 57 |
+
)
|
| 58 |
+
|
| 59 |
+
# ------------------------------------------------------------------
|
| 60 |
+
# Public API
|
| 61 |
+
# ------------------------------------------------------------------
|
| 62 |
+
|
| 63 |
+
def encode_frame(self, pixels: np.ndarray) -> str:
|
| 64 |
+
"""Encode a single RGB frame (H, W, 3) uint8 to a base64 string.
|
| 65 |
+
|
| 66 |
+
Returns an empty string on failure.
|
| 67 |
+
"""
|
| 68 |
+
if pixels is None or pixels.size == 0:
|
| 69 |
+
return ""
|
| 70 |
+
|
| 71 |
+
# Ensure uint8
|
| 72 |
+
if pixels.dtype != np.uint8:
|
| 73 |
+
pixels = np.clip(pixels, 0, 255).astype(np.uint8)
|
| 74 |
+
|
| 75 |
+
if _CV2_AVAILABLE:
|
| 76 |
+
return self._encode_cv2(pixels)
|
| 77 |
+
if _PIL_AVAILABLE:
|
| 78 |
+
return self._encode_pil(pixels)
|
| 79 |
+
|
| 80 |
+
logger.error("VideoEncoder: no image library available.")
|
| 81 |
+
return ""
|
| 82 |
+
|
| 83 |
+
def encode_batch(self, frames: list[np.ndarray]) -> list[str]:
|
| 84 |
+
"""Encode a list of RGB frames, returning a list of base64 strings."""
|
| 85 |
+
return [self.encode_frame(f) for f in frames]
|
| 86 |
+
|
| 87 |
+
# ------------------------------------------------------------------
|
| 88 |
+
# Configuration
|
| 89 |
+
# ------------------------------------------------------------------
|
| 90 |
+
|
| 91 |
+
def set_quality(self, quality: int) -> None:
|
| 92 |
+
"""Update JPEG quality at runtime (1-100)."""
|
| 93 |
+
self.quality = max(1, min(100, quality))
|
| 94 |
+
logger.debug(f"VideoEncoder quality set to {self.quality}")
|
| 95 |
+
|
| 96 |
+
def set_format(self, fmt: Literal["jpeg", "png"]) -> None:
|
| 97 |
+
"""Switch between jpeg and png encoding."""
|
| 98 |
+
self.fmt = fmt.lower()
|
| 99 |
+
logger.debug(f"VideoEncoder format set to {self.fmt}")
|
| 100 |
+
|
| 101 |
+
# ------------------------------------------------------------------
|
| 102 |
+
# Internal encoders
|
| 103 |
+
# ------------------------------------------------------------------
|
| 104 |
+
|
| 105 |
+
def _encode_cv2(self, pixels: np.ndarray) -> str:
|
| 106 |
+
"""Encode using OpenCV (fastest path)."""
|
| 107 |
+
try:
|
| 108 |
+
# OpenCV expects BGR
|
| 109 |
+
bgr = cv2.cvtColor(pixels, cv2.COLOR_RGB2BGR)
|
| 110 |
+
|
| 111 |
+
if self.fmt == "jpeg":
|
| 112 |
+
params = [cv2.IMWRITE_JPEG_QUALITY, self.quality]
|
| 113 |
+
success, buf = cv2.imencode(".jpg", bgr, params)
|
| 114 |
+
else:
|
| 115 |
+
params = [cv2.IMWRITE_PNG_COMPRESSION, 3] # 0-9, 3 is reasonable
|
| 116 |
+
success, buf = cv2.imencode(".png", bgr, params)
|
| 117 |
+
|
| 118 |
+
if not success:
|
| 119 |
+
logger.error("VideoEncoder: cv2.imencode failed.")
|
| 120 |
+
return ""
|
| 121 |
+
|
| 122 |
+
return base64.b64encode(buf.tobytes()).decode("ascii")
|
| 123 |
+
except Exception as exc:
|
| 124 |
+
logger.error(f"VideoEncoder cv2 error: {exc}")
|
| 125 |
+
return ""
|
| 126 |
+
|
| 127 |
+
def _encode_pil(self, pixels: np.ndarray) -> str:
|
| 128 |
+
"""Encode using Pillow (fallback)."""
|
| 129 |
+
try:
|
| 130 |
+
img = Image.fromarray(pixels, mode="RGB")
|
| 131 |
+
buf = io.BytesIO()
|
| 132 |
+
|
| 133 |
+
if self.fmt == "jpeg":
|
| 134 |
+
img.save(buf, format="JPEG", quality=self.quality)
|
| 135 |
+
else:
|
| 136 |
+
img.save(buf, format="PNG")
|
| 137 |
+
|
| 138 |
+
return base64.b64encode(buf.getvalue()).decode("ascii")
|
| 139 |
+
except Exception as exc:
|
| 140 |
+
logger.error(f"VideoEncoder PIL error: {exc}")
|
| 141 |
+
return ""
|
eden_os/gateway/webrtc_handler.py
ADDED
|
@@ -0,0 +1,132 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
EDEN OS -- WebRTC Handler Stub (Agent 6)
|
| 3 |
+
Placeholder for future WebRTC peer-connection support.
|
| 4 |
+
|
| 5 |
+
Currently all real-time streaming goes through the WebSocket fallback.
|
| 6 |
+
This module logs stubs so the interface is in place for when aiortc
|
| 7 |
+
integration is added.
|
| 8 |
+
"""
|
| 9 |
+
|
| 10 |
+
from __future__ import annotations
|
| 11 |
+
|
| 12 |
+
from typing import Any, Optional
|
| 13 |
+
|
| 14 |
+
from loguru import logger
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
class WebRTCHandler:
|
| 18 |
+
"""Placeholder WebRTC handler.
|
| 19 |
+
|
| 20 |
+
All methods log a notice that WebRTC is not yet active and return
|
| 21 |
+
harmless stub values. The WebSocket path in ``websocket_handler.py``
|
| 22 |
+
is the active streaming transport for Phase One.
|
| 23 |
+
"""
|
| 24 |
+
|
| 25 |
+
def __init__(self, session_id: str | None = None) -> None:
|
| 26 |
+
self.session_id = session_id
|
| 27 |
+
self._peer_connection: Any = None
|
| 28 |
+
self._local_tracks: list[Any] = []
|
| 29 |
+
self._remote_tracks: list[Any] = []
|
| 30 |
+
logger.info(
|
| 31 |
+
f"WebRTCHandler created for session {session_id} — "
|
| 32 |
+
"WebRTC is NOT yet active; using WebSocket fallback."
|
| 33 |
+
)
|
| 34 |
+
|
| 35 |
+
# ------------------------------------------------------------------
|
| 36 |
+
# Signaling
|
| 37 |
+
# ------------------------------------------------------------------
|
| 38 |
+
|
| 39 |
+
async def create_offer(self) -> dict:
|
| 40 |
+
"""Create an SDP offer for a new peer connection.
|
| 41 |
+
|
| 42 |
+
Returns a dict with ``sdp`` and ``type`` keys (stub values).
|
| 43 |
+
"""
|
| 44 |
+
logger.warning(
|
| 45 |
+
"WebRTCHandler.create_offer() called but WebRTC is not yet "
|
| 46 |
+
"implemented. Returning stub offer."
|
| 47 |
+
)
|
| 48 |
+
return {
|
| 49 |
+
"sdp": "",
|
| 50 |
+
"type": "offer",
|
| 51 |
+
"active": False,
|
| 52 |
+
"message": "WebRTC not yet active. Use WebSocket streaming.",
|
| 53 |
+
}
|
| 54 |
+
|
| 55 |
+
async def handle_answer(self, answer: dict) -> bool:
|
| 56 |
+
"""Process an SDP answer from the remote peer.
|
| 57 |
+
|
| 58 |
+
Parameters
|
| 59 |
+
----------
|
| 60 |
+
answer:
|
| 61 |
+
Dict with ``sdp`` and ``type`` keys from the remote client.
|
| 62 |
+
|
| 63 |
+
Returns True if accepted (always False for now).
|
| 64 |
+
"""
|
| 65 |
+
logger.warning(
|
| 66 |
+
"WebRTCHandler.handle_answer() called but WebRTC is not yet "
|
| 67 |
+
"implemented. Answer ignored."
|
| 68 |
+
)
|
| 69 |
+
return False
|
| 70 |
+
|
| 71 |
+
async def add_ice_candidate(self, candidate: dict) -> bool:
|
| 72 |
+
"""Add a trickle ICE candidate.
|
| 73 |
+
|
| 74 |
+
Parameters
|
| 75 |
+
----------
|
| 76 |
+
candidate:
|
| 77 |
+
ICE candidate dict from the remote peer.
|
| 78 |
+
"""
|
| 79 |
+
logger.warning(
|
| 80 |
+
"WebRTCHandler.add_ice_candidate() called but WebRTC is not "
|
| 81 |
+
"yet implemented. Candidate ignored."
|
| 82 |
+
)
|
| 83 |
+
return False
|
| 84 |
+
|
| 85 |
+
# ------------------------------------------------------------------
|
| 86 |
+
# Track management
|
| 87 |
+
# ------------------------------------------------------------------
|
| 88 |
+
|
| 89 |
+
async def add_track(
|
| 90 |
+
self,
|
| 91 |
+
track: Any,
|
| 92 |
+
kind: str = "video",
|
| 93 |
+
) -> None:
|
| 94 |
+
"""Add a local media track (audio or video) to the peer connection.
|
| 95 |
+
|
| 96 |
+
Parameters
|
| 97 |
+
----------
|
| 98 |
+
track:
|
| 99 |
+
A media track object (e.g. aiortc MediaStreamTrack).
|
| 100 |
+
kind:
|
| 101 |
+
``"audio"`` or ``"video"``.
|
| 102 |
+
"""
|
| 103 |
+
logger.warning(
|
| 104 |
+
f"WebRTCHandler.add_track(kind={kind}) called but WebRTC is "
|
| 105 |
+
"not yet implemented. Track not added."
|
| 106 |
+
)
|
| 107 |
+
|
| 108 |
+
async def remove_track(self, track: Any) -> None:
|
| 109 |
+
"""Remove a local media track."""
|
| 110 |
+
logger.warning(
|
| 111 |
+
"WebRTCHandler.remove_track() called but WebRTC is not yet "
|
| 112 |
+
"implemented."
|
| 113 |
+
)
|
| 114 |
+
|
| 115 |
+
# ------------------------------------------------------------------
|
| 116 |
+
# Lifecycle
|
| 117 |
+
# ------------------------------------------------------------------
|
| 118 |
+
|
| 119 |
+
async def close(self) -> None:
|
| 120 |
+
"""Close the peer connection and release resources."""
|
| 121 |
+
logger.info(
|
| 122 |
+
f"WebRTCHandler.close() for session {self.session_id} — "
|
| 123 |
+
"no active connection to close."
|
| 124 |
+
)
|
| 125 |
+
self._peer_connection = None
|
| 126 |
+
self._local_tracks.clear()
|
| 127 |
+
self._remote_tracks.clear()
|
| 128 |
+
|
| 129 |
+
@property
|
| 130 |
+
def is_active(self) -> bool:
|
| 131 |
+
"""Whether a WebRTC connection is currently active (always False)."""
|
| 132 |
+
return False
|
eden_os/gateway/websocket_handler.py
ADDED
|
@@ -0,0 +1,308 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
EDEN OS -- WebSocket Streaming Handler (Agent 6)
|
| 3 |
+
Bi-directional audio/video streaming over WebSocket.
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
from __future__ import annotations
|
| 7 |
+
|
| 8 |
+
import asyncio
|
| 9 |
+
import base64
|
| 10 |
+
import json
|
| 11 |
+
from typing import Any
|
| 12 |
+
|
| 13 |
+
from fastapi import WebSocket
|
| 14 |
+
from loguru import logger
|
| 15 |
+
|
| 16 |
+
from eden_os.gateway.audio_capture import AudioCapture
|
| 17 |
+
from eden_os.gateway.video_encoder import VideoEncoder
|
| 18 |
+
from eden_os.shared.types import AvatarState
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
class WebSocketHandler:
|
| 22 |
+
"""Manages a single bi-directional WebSocket session.
|
| 23 |
+
|
| 24 |
+
Receives:
|
| 25 |
+
{"type": "audio", "data": "<base64_pcm>"}
|
| 26 |
+
{"type": "text", "content": "hello"}
|
| 27 |
+
{"type": "interrupt"}
|
| 28 |
+
|
| 29 |
+
Sends:
|
| 30 |
+
{"type": "video_frame", "data": "<base64>"}
|
| 31 |
+
{"type": "audio", "data": "<base64_wav>"}
|
| 32 |
+
{"type": "transcript", "text": "..."}
|
| 33 |
+
{"type": "state", "value": "speaking"}
|
| 34 |
+
"""
|
| 35 |
+
|
| 36 |
+
def __init__(
|
| 37 |
+
self,
|
| 38 |
+
session_id: str,
|
| 39 |
+
websocket: WebSocket,
|
| 40 |
+
conductor: Any,
|
| 41 |
+
send_queue_size: int = 256,
|
| 42 |
+
recv_queue_size: int = 256,
|
| 43 |
+
) -> None:
|
| 44 |
+
self.session_id = session_id
|
| 45 |
+
self.ws = websocket
|
| 46 |
+
self.conductor = conductor
|
| 47 |
+
|
| 48 |
+
self._send_queue: asyncio.Queue[dict] = asyncio.Queue(
|
| 49 |
+
maxsize=send_queue_size
|
| 50 |
+
)
|
| 51 |
+
self._recv_queue: asyncio.Queue[dict] = asyncio.Queue(
|
| 52 |
+
maxsize=recv_queue_size
|
| 53 |
+
)
|
| 54 |
+
self._running = False
|
| 55 |
+
self._current_state = AvatarState.IDLE
|
| 56 |
+
|
| 57 |
+
self.audio_capture = AudioCapture()
|
| 58 |
+
self.video_encoder = VideoEncoder()
|
| 59 |
+
|
| 60 |
+
# ------------------------------------------------------------------
|
| 61 |
+
# Public API
|
| 62 |
+
# ------------------------------------------------------------------
|
| 63 |
+
|
| 64 |
+
async def run(self) -> None:
|
| 65 |
+
"""Main loop — runs until the connection closes."""
|
| 66 |
+
self._running = True
|
| 67 |
+
logger.debug(f"[{self.session_id}] WebSocket handler starting.")
|
| 68 |
+
|
| 69 |
+
# Run the receive and send loops concurrently
|
| 70 |
+
recv_task = asyncio.create_task(self._receive_loop())
|
| 71 |
+
send_task = asyncio.create_task(self._send_loop())
|
| 72 |
+
process_task = asyncio.create_task(self._process_loop())
|
| 73 |
+
|
| 74 |
+
try:
|
| 75 |
+
done, pending = await asyncio.wait(
|
| 76 |
+
[recv_task, send_task, process_task],
|
| 77 |
+
return_when=asyncio.FIRST_COMPLETED,
|
| 78 |
+
)
|
| 79 |
+
# Cancel remaining tasks
|
| 80 |
+
for task in pending:
|
| 81 |
+
task.cancel()
|
| 82 |
+
try:
|
| 83 |
+
await task
|
| 84 |
+
except (asyncio.CancelledError, Exception):
|
| 85 |
+
pass
|
| 86 |
+
# Re-raise any exception from the completed tasks
|
| 87 |
+
for task in done:
|
| 88 |
+
if task.exception() is not None:
|
| 89 |
+
raise task.exception() # type: ignore[misc]
|
| 90 |
+
finally:
|
| 91 |
+
self._running = False
|
| 92 |
+
logger.debug(f"[{self.session_id}] WebSocket handler stopped.")
|
| 93 |
+
|
| 94 |
+
async def close(self) -> None:
|
| 95 |
+
"""Signal the handler to stop and close the WebSocket."""
|
| 96 |
+
self._running = False
|
| 97 |
+
try:
|
| 98 |
+
await self.ws.close()
|
| 99 |
+
except Exception:
|
| 100 |
+
pass
|
| 101 |
+
|
| 102 |
+
async def handle_interrupt(self) -> None:
|
| 103 |
+
"""Handle an interrupt triggered externally (e.g. REST endpoint)."""
|
| 104 |
+
logger.info(f"[{self.session_id}] External interrupt received.")
|
| 105 |
+
await self._set_state(AvatarState.LISTENING)
|
| 106 |
+
try:
|
| 107 |
+
await self.conductor.interrupt(self.session_id)
|
| 108 |
+
except Exception as exc:
|
| 109 |
+
logger.warning(f"[{self.session_id}] Conductor interrupt error: {exc}")
|
| 110 |
+
|
| 111 |
+
# ------------------------------------------------------------------
|
| 112 |
+
# Send helpers
|
| 113 |
+
# ------------------------------------------------------------------
|
| 114 |
+
|
| 115 |
+
async def send_video_frame(self, frame_b64: str) -> None:
|
| 116 |
+
"""Queue a video frame to be sent to the client."""
|
| 117 |
+
await self._enqueue_send({"type": "video_frame", "data": frame_b64})
|
| 118 |
+
|
| 119 |
+
async def send_audio(self, audio_b64: str) -> None:
|
| 120 |
+
"""Queue an audio chunk to be sent to the client."""
|
| 121 |
+
await self._enqueue_send({"type": "audio", "data": audio_b64})
|
| 122 |
+
|
| 123 |
+
async def send_transcript(self, text: str) -> None:
|
| 124 |
+
"""Queue a transcript message."""
|
| 125 |
+
await self._enqueue_send({"type": "transcript", "text": text})
|
| 126 |
+
|
| 127 |
+
async def send_state(self, state: AvatarState) -> None:
|
| 128 |
+
"""Queue a state-change notification."""
|
| 129 |
+
await self._enqueue_send({"type": "state", "value": state.value})
|
| 130 |
+
|
| 131 |
+
# ------------------------------------------------------------------
|
| 132 |
+
# Internal loops
|
| 133 |
+
# ------------------------------------------------------------------
|
| 134 |
+
|
| 135 |
+
async def _receive_loop(self) -> None:
|
| 136 |
+
"""Read messages from the WebSocket and push to _recv_queue."""
|
| 137 |
+
while self._running:
|
| 138 |
+
try:
|
| 139 |
+
raw = await self.ws.receive_text()
|
| 140 |
+
msg = json.loads(raw)
|
| 141 |
+
try:
|
| 142 |
+
self._recv_queue.put_nowait(msg)
|
| 143 |
+
except asyncio.QueueFull:
|
| 144 |
+
logger.warning(
|
| 145 |
+
f"[{self.session_id}] Receive queue full, dropping message."
|
| 146 |
+
)
|
| 147 |
+
except Exception:
|
| 148 |
+
# WebSocket closed or malformed data
|
| 149 |
+
self._running = False
|
| 150 |
+
break
|
| 151 |
+
|
| 152 |
+
async def _send_loop(self) -> None:
|
| 153 |
+
"""Drain _send_queue and write to the WebSocket."""
|
| 154 |
+
while self._running:
|
| 155 |
+
try:
|
| 156 |
+
msg = await asyncio.wait_for(
|
| 157 |
+
self._send_queue.get(), timeout=0.1
|
| 158 |
+
)
|
| 159 |
+
await self.ws.send_text(json.dumps(msg))
|
| 160 |
+
except asyncio.TimeoutError:
|
| 161 |
+
continue
|
| 162 |
+
except Exception:
|
| 163 |
+
self._running = False
|
| 164 |
+
break
|
| 165 |
+
|
| 166 |
+
async def _process_loop(self) -> None:
|
| 167 |
+
"""Drain _recv_queue and dispatch each message to the appropriate handler."""
|
| 168 |
+
while self._running:
|
| 169 |
+
try:
|
| 170 |
+
msg = await asyncio.wait_for(
|
| 171 |
+
self._recv_queue.get(), timeout=0.1
|
| 172 |
+
)
|
| 173 |
+
except asyncio.TimeoutError:
|
| 174 |
+
continue
|
| 175 |
+
|
| 176 |
+
msg_type = msg.get("type")
|
| 177 |
+
try:
|
| 178 |
+
if msg_type == "audio":
|
| 179 |
+
await self._handle_audio(msg)
|
| 180 |
+
elif msg_type == "text":
|
| 181 |
+
await self._handle_text(msg)
|
| 182 |
+
elif msg_type == "interrupt":
|
| 183 |
+
await self._handle_interrupt_msg()
|
| 184 |
+
else:
|
| 185 |
+
logger.warning(
|
| 186 |
+
f"[{self.session_id}] Unknown message type: {msg_type}"
|
| 187 |
+
)
|
| 188 |
+
except Exception as exc:
|
| 189 |
+
logger.error(
|
| 190 |
+
f"[{self.session_id}] Error processing {msg_type}: {exc}"
|
| 191 |
+
)
|
| 192 |
+
|
| 193 |
+
# ------------------------------------------------------------------
|
| 194 |
+
# Message handlers
|
| 195 |
+
# ------------------------------------------------------------------
|
| 196 |
+
|
| 197 |
+
async def _handle_audio(self, msg: dict) -> None:
|
| 198 |
+
"""Process incoming base64 PCM audio from the user."""
|
| 199 |
+
b64_data = msg.get("data", "")
|
| 200 |
+
if not b64_data:
|
| 201 |
+
return
|
| 202 |
+
|
| 203 |
+
# Decode and preprocess audio
|
| 204 |
+
chunk = self.audio_capture.process(b64_data)
|
| 205 |
+
if chunk is None:
|
| 206 |
+
return # Below noise gate
|
| 207 |
+
|
| 208 |
+
await self._set_state(AvatarState.LISTENING)
|
| 209 |
+
|
| 210 |
+
# Forward to Conductor for ASR + pipeline
|
| 211 |
+
try:
|
| 212 |
+
# If the conductor exposes a handle_audio_chunk method, use it.
|
| 213 |
+
if hasattr(self.conductor, "handle_audio_chunk"):
|
| 214 |
+
result = await self.conductor.handle_audio_chunk(
|
| 215 |
+
self.session_id, chunk
|
| 216 |
+
)
|
| 217 |
+
if result:
|
| 218 |
+
await self._dispatch_conductor_result(result)
|
| 219 |
+
except Exception as exc:
|
| 220 |
+
logger.warning(
|
| 221 |
+
f"[{self.session_id}] Conductor audio handling error: {exc}"
|
| 222 |
+
)
|
| 223 |
+
|
| 224 |
+
async def _handle_text(self, msg: dict) -> None:
|
| 225 |
+
"""Process incoming text from the user (typed input)."""
|
| 226 |
+
content = msg.get("content", "").strip()
|
| 227 |
+
if not content:
|
| 228 |
+
return
|
| 229 |
+
|
| 230 |
+
logger.info(f"[{self.session_id}] Text input: {content[:80]}")
|
| 231 |
+
await self._set_state(AvatarState.THINKING)
|
| 232 |
+
|
| 233 |
+
try:
|
| 234 |
+
if hasattr(self.conductor, "handle_text_input"):
|
| 235 |
+
result = await self.conductor.handle_text_input(
|
| 236 |
+
self.session_id, content
|
| 237 |
+
)
|
| 238 |
+
if result:
|
| 239 |
+
await self._dispatch_conductor_result(result)
|
| 240 |
+
except Exception as exc:
|
| 241 |
+
logger.warning(
|
| 242 |
+
f"[{self.session_id}] Conductor text handling error: {exc}"
|
| 243 |
+
)
|
| 244 |
+
|
| 245 |
+
async def _handle_interrupt_msg(self) -> None:
|
| 246 |
+
"""Handle an interrupt message from the WebSocket client."""
|
| 247 |
+
logger.info(f"[{self.session_id}] Client interrupt received.")
|
| 248 |
+
await self.handle_interrupt()
|
| 249 |
+
|
| 250 |
+
# ------------------------------------------------------------------
|
| 251 |
+
# Conductor result dispatch
|
| 252 |
+
# ------------------------------------------------------------------
|
| 253 |
+
|
| 254 |
+
async def _dispatch_conductor_result(self, result: dict) -> None:
|
| 255 |
+
"""Dispatch a result dict from the Conductor to outbound messages.
|
| 256 |
+
|
| 257 |
+
Expected keys (all optional):
|
| 258 |
+
transcript: str — the avatar's spoken text
|
| 259 |
+
audio: bytes or base64 str — WAV audio to send
|
| 260 |
+
frames: list[np.ndarray] — video frames to encode and send
|
| 261 |
+
state: str — avatar state transition
|
| 262 |
+
"""
|
| 263 |
+
if "state" in result:
|
| 264 |
+
new_state = AvatarState(result["state"])
|
| 265 |
+
await self._set_state(new_state)
|
| 266 |
+
|
| 267 |
+
if "transcript" in result:
|
| 268 |
+
await self.send_transcript(result["transcript"])
|
| 269 |
+
|
| 270 |
+
if "audio" in result:
|
| 271 |
+
audio_data = result["audio"]
|
| 272 |
+
if isinstance(audio_data, (bytes, bytearray)):
|
| 273 |
+
audio_data = base64.b64encode(audio_data).decode("ascii")
|
| 274 |
+
await self.send_audio(audio_data)
|
| 275 |
+
|
| 276 |
+
if "frames" in result:
|
| 277 |
+
for frame in result["frames"]:
|
| 278 |
+
encoded = self.video_encoder.encode_frame(frame)
|
| 279 |
+
await self.send_video_frame(encoded)
|
| 280 |
+
|
| 281 |
+
# ------------------------------------------------------------------
|
| 282 |
+
# State management
|
| 283 |
+
# ------------------------------------------------------------------
|
| 284 |
+
|
| 285 |
+
async def _set_state(self, new_state: AvatarState) -> None:
|
| 286 |
+
if new_state != self._current_state:
|
| 287 |
+
self._current_state = new_state
|
| 288 |
+
await self.send_state(new_state)
|
| 289 |
+
|
| 290 |
+
# ------------------------------------------------------------------
|
| 291 |
+
# Helpers
|
| 292 |
+
# ------------------------------------------------------------------
|
| 293 |
+
|
| 294 |
+
async def _enqueue_send(self, msg: dict) -> None:
|
| 295 |
+
try:
|
| 296 |
+
self._send_queue.put_nowait(msg)
|
| 297 |
+
except asyncio.QueueFull:
|
| 298 |
+
# Drop oldest message to make room
|
| 299 |
+
try:
|
| 300 |
+
self._send_queue.get_nowait()
|
| 301 |
+
except asyncio.QueueEmpty:
|
| 302 |
+
pass
|
| 303 |
+
try:
|
| 304 |
+
self._send_queue.put_nowait(msg)
|
| 305 |
+
except asyncio.QueueFull:
|
| 306 |
+
logger.warning(
|
| 307 |
+
f"[{self.session_id}] Send queue overflow, dropping."
|
| 308 |
+
)
|
eden_os/genesis/__init__.py
ADDED
|
@@ -0,0 +1,116 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
EDEN OS — Genesis Engine (Agent 1: Portrait-to-4D)
|
| 3 |
+
Composes all sub-modules into a single GenesisEngine that implements
|
| 4 |
+
the IGenesisEngine interface.
|
| 5 |
+
|
| 6 |
+
Usage:
|
| 7 |
+
from eden_os.genesis import GenesisEngine
|
| 8 |
+
engine = GenesisEngine()
|
| 9 |
+
result = await engine.process_upload(image)
|
| 10 |
+
"""
|
| 11 |
+
|
| 12 |
+
from __future__ import annotations
|
| 13 |
+
|
| 14 |
+
import numpy as np
|
| 15 |
+
from loguru import logger
|
| 16 |
+
|
| 17 |
+
from eden_os.shared.interfaces import IGenesisEngine
|
| 18 |
+
from eden_os.shared.types import EdenValidationResult
|
| 19 |
+
|
| 20 |
+
from eden_os.genesis.portrait_engine import PortraitEngine
|
| 21 |
+
from eden_os.genesis.eden_protocol_validator import EdenProtocolValidator
|
| 22 |
+
from eden_os.genesis.latent_encoder import LatentEncoder
|
| 23 |
+
from eden_os.genesis.preload_cache import PreloadCache
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
class GenesisEngine(IGenesisEngine):
|
| 27 |
+
"""Agent 1 — Portrait-to-4D Engine.
|
| 28 |
+
|
| 29 |
+
Composes:
|
| 30 |
+
PortraitEngine – face detection, alignment, lighting normalisation
|
| 31 |
+
EdenProtocolValidator – 0.3 deviation skin texture fidelity check
|
| 32 |
+
LatentEncoder – portrait → animation-ready latent vector
|
| 33 |
+
PreloadCache – pre-computed idle animation seed data
|
| 34 |
+
"""
|
| 35 |
+
|
| 36 |
+
def __init__(self, latent_dim: int = 512, num_idle_seeds: int = 8) -> None:
|
| 37 |
+
self._portrait = PortraitEngine()
|
| 38 |
+
self._validator = EdenProtocolValidator()
|
| 39 |
+
self._encoder = LatentEncoder(latent_dim=latent_dim)
|
| 40 |
+
self._cache = PreloadCache(num_seeds=num_idle_seeds)
|
| 41 |
+
logger.info("GenesisEngine initialised (latent_dim={}, idle_seeds={})",
|
| 42 |
+
latent_dim, num_idle_seeds)
|
| 43 |
+
|
| 44 |
+
# ------------------------------------------------------------------
|
| 45 |
+
# IGenesisEngine interface
|
| 46 |
+
# ------------------------------------------------------------------
|
| 47 |
+
|
| 48 |
+
async def process_upload(self, image: np.ndarray) -> dict:
|
| 49 |
+
"""Process uploaded portrait: face detection, alignment, enhancement.
|
| 50 |
+
|
| 51 |
+
Parameters
|
| 52 |
+
----------
|
| 53 |
+
image : np.ndarray – RGB uint8 image (H, W, 3).
|
| 54 |
+
|
| 55 |
+
Returns
|
| 56 |
+
-------
|
| 57 |
+
dict with keys: aligned_face, landmarks, bbox, original_image.
|
| 58 |
+
"""
|
| 59 |
+
return await self._portrait.process(image)
|
| 60 |
+
|
| 61 |
+
async def validate_eden_protocol(
|
| 62 |
+
self,
|
| 63 |
+
generated: np.ndarray,
|
| 64 |
+
reference: np.ndarray,
|
| 65 |
+
threshold: float = 0.3,
|
| 66 |
+
) -> EdenValidationResult:
|
| 67 |
+
"""Validate skin texture fidelity against reference.
|
| 68 |
+
|
| 69 |
+
Parameters
|
| 70 |
+
----------
|
| 71 |
+
generated : np.ndarray – RGB uint8 generated portrait.
|
| 72 |
+
reference : np.ndarray – RGB uint8 reference portrait.
|
| 73 |
+
threshold : float – max allowed deviation (default 0.3).
|
| 74 |
+
|
| 75 |
+
Returns
|
| 76 |
+
-------
|
| 77 |
+
EdenValidationResult with .passed, .score, .feedback.
|
| 78 |
+
"""
|
| 79 |
+
return await self._validator.validate(generated, reference, threshold)
|
| 80 |
+
|
| 81 |
+
async def encode_to_latent(self, portrait: np.ndarray) -> np.ndarray:
|
| 82 |
+
"""Encode portrait to animation-engine-compatible latent vector.
|
| 83 |
+
|
| 84 |
+
Parameters
|
| 85 |
+
----------
|
| 86 |
+
portrait : np.ndarray – RGB uint8 image.
|
| 87 |
+
|
| 88 |
+
Returns
|
| 89 |
+
-------
|
| 90 |
+
np.ndarray of shape (latent_dim,) float32.
|
| 91 |
+
"""
|
| 92 |
+
return await self._encoder.encode(portrait)
|
| 93 |
+
|
| 94 |
+
async def precompute_idle_cache(self, profile: dict) -> dict:
|
| 95 |
+
"""Pre-compute idle animation seed frames and breathing cycle.
|
| 96 |
+
|
| 97 |
+
Parameters
|
| 98 |
+
----------
|
| 99 |
+
profile : dict – must contain 'aligned_face' (np.ndarray 512x512x3).
|
| 100 |
+
|
| 101 |
+
Returns
|
| 102 |
+
-------
|
| 103 |
+
dict with 'seed_frames' and 'breathing_cycle' lists.
|
| 104 |
+
"""
|
| 105 |
+
return await self._cache.compute(profile)
|
| 106 |
+
|
| 107 |
+
# ------------------------------------------------------------------
|
| 108 |
+
# Lifecycle
|
| 109 |
+
# ------------------------------------------------------------------
|
| 110 |
+
def close(self) -> None:
|
| 111 |
+
"""Release resources held by sub-engines."""
|
| 112 |
+
self._portrait.close()
|
| 113 |
+
logger.info("GenesisEngine closed")
|
| 114 |
+
|
| 115 |
+
|
| 116 |
+
__all__ = ["GenesisEngine"]
|
eden_os/genesis/eden_protocol_validator.py
ADDED
|
@@ -0,0 +1,177 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
EDEN OS — Genesis Engine: Eden Protocol Validator
|
| 3 |
+
Implements the 0.3 deviation rule for skin texture fidelity.
|
| 4 |
+
|
| 5 |
+
Process:
|
| 6 |
+
1. Extract face region from both generated and reference images.
|
| 7 |
+
2. Convert to LAB colour space.
|
| 8 |
+
3. Apply Gabor filter bank (4 orientations x 3 frequencies) to the
|
| 9 |
+
L-channel to capture micro-texture features (pores, fine lines).
|
| 10 |
+
4. Compute standard deviation between the two feature vectors.
|
| 11 |
+
5. Return EdenValidationResult with passed / score / feedback.
|
| 12 |
+
"""
|
| 13 |
+
|
| 14 |
+
from __future__ import annotations
|
| 15 |
+
|
| 16 |
+
import numpy as np
|
| 17 |
+
import cv2
|
| 18 |
+
from loguru import logger
|
| 19 |
+
|
| 20 |
+
from eden_os.shared.types import EdenValidationResult
|
| 21 |
+
|
| 22 |
+
try:
|
| 23 |
+
from skimage.filters import gabor_kernel
|
| 24 |
+
_HAS_SKIMAGE = True
|
| 25 |
+
except ImportError:
|
| 26 |
+
_HAS_SKIMAGE = False
|
| 27 |
+
logger.warning("scikit-image not installed — Gabor filter bank unavailable, "
|
| 28 |
+
"EdenProtocolValidator will use histogram fallback")
|
| 29 |
+
|
| 30 |
+
from scipy.ndimage import convolve
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
class EdenProtocolValidator:
|
| 34 |
+
"""Validates that a generated image preserves the reference portrait's
|
| 35 |
+
skin texture within the Eden Protocol's 0.3 standard-deviation threshold.
|
| 36 |
+
"""
|
| 37 |
+
|
| 38 |
+
# Gabor filter bank parameters
|
| 39 |
+
ORIENTATIONS = [0, np.pi / 4, np.pi / 2, 3 * np.pi / 4] # 4 orientations
|
| 40 |
+
FREQUENCIES = [0.1, 0.25, 0.4] # 3 frequencies
|
| 41 |
+
|
| 42 |
+
def __init__(self) -> None:
|
| 43 |
+
self._kernels: list[np.ndarray] = []
|
| 44 |
+
self._build_filter_bank()
|
| 45 |
+
|
| 46 |
+
# ------------------------------------------------------------------
|
| 47 |
+
# Filter bank construction
|
| 48 |
+
# ------------------------------------------------------------------
|
| 49 |
+
def _build_filter_bank(self) -> None:
|
| 50 |
+
"""Pre-compute the Gabor filter kernels."""
|
| 51 |
+
if not _HAS_SKIMAGE:
|
| 52 |
+
return
|
| 53 |
+
|
| 54 |
+
for theta in self.ORIENTATIONS:
|
| 55 |
+
for freq in self.FREQUENCIES:
|
| 56 |
+
kernel = gabor_kernel(frequency=freq, theta=theta,
|
| 57 |
+
sigma_x=3.0, sigma_y=3.0)
|
| 58 |
+
# Store the real part as a float64 2-D kernel
|
| 59 |
+
self._kernels.append(np.real(kernel).astype(np.float64))
|
| 60 |
+
|
| 61 |
+
logger.debug("EdenProtocolValidator: built {} Gabor kernels",
|
| 62 |
+
len(self._kernels))
|
| 63 |
+
|
| 64 |
+
# ------------------------------------------------------------------
|
| 65 |
+
# Public API
|
| 66 |
+
# ------------------------------------------------------------------
|
| 67 |
+
async def validate(
|
| 68 |
+
self,
|
| 69 |
+
generated: np.ndarray,
|
| 70 |
+
reference: np.ndarray,
|
| 71 |
+
threshold: float = 0.3,
|
| 72 |
+
) -> EdenValidationResult:
|
| 73 |
+
"""Compare *generated* against *reference* and return validation result.
|
| 74 |
+
|
| 75 |
+
Parameters
|
| 76 |
+
----------
|
| 77 |
+
generated : np.ndarray – RGB uint8 image of the generated portrait.
|
| 78 |
+
reference : np.ndarray – RGB uint8 image of the reference portrait.
|
| 79 |
+
threshold : float – maximum allowed std-dev distance (default 0.3).
|
| 80 |
+
|
| 81 |
+
Returns
|
| 82 |
+
-------
|
| 83 |
+
EdenValidationResult with .passed, .score, .feedback
|
| 84 |
+
"""
|
| 85 |
+
logger.info("EdenProtocolValidator.validate — threshold={}", threshold)
|
| 86 |
+
|
| 87 |
+
# Resize both images to the same dimensions for fair comparison
|
| 88 |
+
target_size = (256, 256)
|
| 89 |
+
gen_resized = cv2.resize(generated, target_size, interpolation=cv2.INTER_AREA)
|
| 90 |
+
ref_resized = cv2.resize(reference, target_size, interpolation=cv2.INTER_AREA)
|
| 91 |
+
|
| 92 |
+
# Extract the L-channel from LAB
|
| 93 |
+
gen_l = self._extract_lightness(gen_resized)
|
| 94 |
+
ref_l = self._extract_lightness(ref_resized)
|
| 95 |
+
|
| 96 |
+
# Compute feature vectors
|
| 97 |
+
gen_features = self._compute_texture_features(gen_l)
|
| 98 |
+
ref_features = self._compute_texture_features(ref_l)
|
| 99 |
+
|
| 100 |
+
# Compute deviation
|
| 101 |
+
score = self._compute_deviation(gen_features, ref_features)
|
| 102 |
+
|
| 103 |
+
passed = score <= threshold
|
| 104 |
+
if passed:
|
| 105 |
+
feedback = (f"Eden Protocol PASSED — skin texture deviation {score:.4f} "
|
| 106 |
+
f"is within the {threshold} threshold.")
|
| 107 |
+
else:
|
| 108 |
+
feedback = (f"Eden Protocol FAILED — skin texture deviation {score:.4f} "
|
| 109 |
+
f"exceeds the {threshold} threshold. "
|
| 110 |
+
"Consider regenerating with tighter identity lock or "
|
| 111 |
+
"reducing style transfer intensity.")
|
| 112 |
+
|
| 113 |
+
logger.info("EdenProtocolValidator result: passed={} score={:.4f}", passed, score)
|
| 114 |
+
return EdenValidationResult(passed=passed, score=float(score), feedback=feedback)
|
| 115 |
+
|
| 116 |
+
# ------------------------------------------------------------------
|
| 117 |
+
# Internal helpers
|
| 118 |
+
# ------------------------------------------------------------------
|
| 119 |
+
@staticmethod
|
| 120 |
+
def _extract_lightness(image_rgb: np.ndarray) -> np.ndarray:
|
| 121 |
+
"""Convert RGB → LAB and return the L channel as float64 in [0, 1]."""
|
| 122 |
+
lab = cv2.cvtColor(image_rgb, cv2.COLOR_RGB2LAB)
|
| 123 |
+
l_channel = lab[:, :, 0].astype(np.float64) / 255.0
|
| 124 |
+
return l_channel
|
| 125 |
+
|
| 126 |
+
def _compute_texture_features(self, lightness: np.ndarray) -> np.ndarray:
|
| 127 |
+
"""Apply Gabor filter bank and collect mean + std of each response
|
| 128 |
+
into a single feature vector.
|
| 129 |
+
|
| 130 |
+
Returns a 1-D numpy array of length 2 * num_kernels.
|
| 131 |
+
"""
|
| 132 |
+
if not self._kernels:
|
| 133 |
+
# Fallback: use simple gradient-based texture descriptor
|
| 134 |
+
return self._gradient_fallback(lightness)
|
| 135 |
+
|
| 136 |
+
features: list[float] = []
|
| 137 |
+
for kernel in self._kernels:
|
| 138 |
+
# Convolve lightness image with the Gabor kernel
|
| 139 |
+
response = convolve(lightness, kernel, mode="reflect")
|
| 140 |
+
features.append(float(np.mean(np.abs(response))))
|
| 141 |
+
features.append(float(np.std(response)))
|
| 142 |
+
|
| 143 |
+
return np.array(features, dtype=np.float64)
|
| 144 |
+
|
| 145 |
+
@staticmethod
|
| 146 |
+
def _gradient_fallback(lightness: np.ndarray) -> np.ndarray:
|
| 147 |
+
"""Fallback texture descriptor using Sobel gradients when skimage
|
| 148 |
+
is unavailable."""
|
| 149 |
+
gx = cv2.Sobel(lightness, cv2.CV_64F, 1, 0, ksize=3)
|
| 150 |
+
gy = cv2.Sobel(lightness, cv2.CV_64F, 0, 1, ksize=3)
|
| 151 |
+
mag = np.sqrt(gx ** 2 + gy ** 2)
|
| 152 |
+
# Compute statistics in 4x4 spatial grid
|
| 153 |
+
features: list[float] = []
|
| 154 |
+
h, w = lightness.shape
|
| 155 |
+
gh, gw = h // 4, w // 4
|
| 156 |
+
for gy_idx in range(4):
|
| 157 |
+
for gx_idx in range(4):
|
| 158 |
+
patch = mag[gy_idx * gh:(gy_idx + 1) * gh,
|
| 159 |
+
gx_idx * gw:(gx_idx + 1) * gw]
|
| 160 |
+
features.append(float(np.mean(patch)))
|
| 161 |
+
features.append(float(np.std(patch)))
|
| 162 |
+
return np.array(features, dtype=np.float64)
|
| 163 |
+
|
| 164 |
+
@staticmethod
|
| 165 |
+
def _compute_deviation(feat_a: np.ndarray, feat_b: np.ndarray) -> float:
|
| 166 |
+
"""Compute the normalised standard-deviation distance between two
|
| 167 |
+
feature vectors.
|
| 168 |
+
|
| 169 |
+
We use the root-mean-square of element-wise differences, normalised
|
| 170 |
+
by the mean magnitude of the reference vector so the score is
|
| 171 |
+
scale-invariant.
|
| 172 |
+
"""
|
| 173 |
+
diff = feat_a - feat_b
|
| 174 |
+
rms = float(np.sqrt(np.mean(diff ** 2)))
|
| 175 |
+
ref_mag = float(np.mean(np.abs(feat_b))) + 1e-8 # avoid division by zero
|
| 176 |
+
deviation = rms / ref_mag
|
| 177 |
+
return deviation
|
eden_os/genesis/latent_encoder.py
ADDED
|
@@ -0,0 +1,129 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
EDEN OS — Genesis Engine: Latent Encoder
|
| 3 |
+
Encodes a processed portrait into a compact latent representation
|
| 4 |
+
compatible with the animation engine (LivePortrait appearance extractor).
|
| 5 |
+
|
| 6 |
+
Pipeline:
|
| 7 |
+
1. Resize to 256x256.
|
| 8 |
+
2. Normalise pixel values to [-1, 1].
|
| 9 |
+
3. Extract multi-scale feature maps via spatial pyramid pooling.
|
| 10 |
+
4. Average-pool feature maps to produce a 1-D latent vector.
|
| 11 |
+
"""
|
| 12 |
+
|
| 13 |
+
from __future__ import annotations
|
| 14 |
+
|
| 15 |
+
import numpy as np
|
| 16 |
+
import cv2
|
| 17 |
+
from loguru import logger
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
class LatentEncoder:
|
| 21 |
+
"""Encode a portrait image into a latent vector for the animation engine."""
|
| 22 |
+
|
| 23 |
+
ENCODE_SIZE = 256
|
| 24 |
+
LATENT_DIM = 512 # output vector dimensionality
|
| 25 |
+
|
| 26 |
+
def __init__(self, latent_dim: int = 512) -> None:
|
| 27 |
+
self.LATENT_DIM = latent_dim
|
| 28 |
+
|
| 29 |
+
# ------------------------------------------------------------------
|
| 30 |
+
# Public API
|
| 31 |
+
# ------------------------------------------------------------------
|
| 32 |
+
async def encode(self, portrait: np.ndarray) -> np.ndarray:
|
| 33 |
+
"""Encode *portrait* (RGB uint8) into a 1-D latent vector.
|
| 34 |
+
|
| 35 |
+
Parameters
|
| 36 |
+
----------
|
| 37 |
+
portrait : np.ndarray
|
| 38 |
+
RGB image, any size, dtype uint8.
|
| 39 |
+
|
| 40 |
+
Returns
|
| 41 |
+
-------
|
| 42 |
+
np.ndarray of shape (LATENT_DIM,) and dtype float32.
|
| 43 |
+
"""
|
| 44 |
+
logger.info("LatentEncoder.encode — input shape {}", portrait.shape)
|
| 45 |
+
|
| 46 |
+
# Step 1: resize
|
| 47 |
+
resized = cv2.resize(portrait, (self.ENCODE_SIZE, self.ENCODE_SIZE),
|
| 48 |
+
interpolation=cv2.INTER_LANCZOS4)
|
| 49 |
+
|
| 50 |
+
# Step 2: normalise to [-1, 1] float32
|
| 51 |
+
normalised = resized.astype(np.float32) / 127.5 - 1.0 # (256, 256, 3)
|
| 52 |
+
|
| 53 |
+
# Step 3: extract multi-scale feature maps via spatial pyramid
|
| 54 |
+
features = self._spatial_pyramid_features(normalised)
|
| 55 |
+
|
| 56 |
+
# Step 4: project to final latent dimension
|
| 57 |
+
latent = self._project_to_latent(features)
|
| 58 |
+
|
| 59 |
+
logger.info("LatentEncoder.encode — output shape {}", latent.shape)
|
| 60 |
+
return latent
|
| 61 |
+
|
| 62 |
+
# ------------------------------------------------------------------
|
| 63 |
+
# Feature extraction
|
| 64 |
+
# ------------------------------------------------------------------
|
| 65 |
+
def _spatial_pyramid_features(self, image: np.ndarray) -> np.ndarray:
|
| 66 |
+
"""Compute multi-scale pooled features using a spatial pyramid.
|
| 67 |
+
|
| 68 |
+
Levels: 1x1, 2x2, 4x4, 8x8 spatial grids.
|
| 69 |
+
At each level, compute (mean, std) per channel in each cell.
|
| 70 |
+
Total features = sum_over_levels(grid_cells * channels * 2).
|
| 71 |
+
"""
|
| 72 |
+
h, w, c = image.shape
|
| 73 |
+
features: list[float] = []
|
| 74 |
+
|
| 75 |
+
for grid_size in [1, 2, 4, 8]:
|
| 76 |
+
cell_h = h // grid_size
|
| 77 |
+
cell_w = w // grid_size
|
| 78 |
+
for gy in range(grid_size):
|
| 79 |
+
for gx in range(grid_size):
|
| 80 |
+
cell = image[gy * cell_h:(gy + 1) * cell_h,
|
| 81 |
+
gx * cell_w:(gx + 1) * cell_w, :]
|
| 82 |
+
for ch in range(c):
|
| 83 |
+
features.append(float(np.mean(cell[:, :, ch])))
|
| 84 |
+
features.append(float(np.std(cell[:, :, ch])))
|
| 85 |
+
|
| 86 |
+
# Also add gradient-based features for edge/texture awareness
|
| 87 |
+
gray = np.mean(image, axis=2)
|
| 88 |
+
gx = np.gradient(gray, axis=1)
|
| 89 |
+
gy = np.gradient(gray, axis=0)
|
| 90 |
+
mag = np.sqrt(gx ** 2 + gy ** 2)
|
| 91 |
+
angle = np.arctan2(gy, gx)
|
| 92 |
+
|
| 93 |
+
# Gradient histogram (8 bins of orientation, mean magnitude per bin)
|
| 94 |
+
bin_edges = np.linspace(-np.pi, np.pi, 9)
|
| 95 |
+
for i in range(8):
|
| 96 |
+
mask = (angle >= bin_edges[i]) & (angle < bin_edges[i + 1])
|
| 97 |
+
if np.any(mask):
|
| 98 |
+
features.append(float(np.mean(mag[mask])))
|
| 99 |
+
else:
|
| 100 |
+
features.append(0.0)
|
| 101 |
+
|
| 102 |
+
return np.array(features, dtype=np.float32)
|
| 103 |
+
|
| 104 |
+
def _project_to_latent(self, features: np.ndarray) -> np.ndarray:
|
| 105 |
+
"""Deterministically project the feature vector to LATENT_DIM via
|
| 106 |
+
a fixed random projection (seeded for reproducibility).
|
| 107 |
+
|
| 108 |
+
This is a lightweight stand-in for a learned encoder; it preserves
|
| 109 |
+
the structure of the feature space while producing a vector of the
|
| 110 |
+
correct dimensionality for downstream consumption.
|
| 111 |
+
"""
|
| 112 |
+
feat_dim = features.shape[0]
|
| 113 |
+
|
| 114 |
+
if feat_dim == self.LATENT_DIM:
|
| 115 |
+
return features
|
| 116 |
+
|
| 117 |
+
# Fixed random projection matrix (seeded so identical images → identical latents)
|
| 118 |
+
rng = np.random.RandomState(42)
|
| 119 |
+
proj = rng.randn(feat_dim, self.LATENT_DIM).astype(np.float32)
|
| 120 |
+
# Normalise columns for unit variance
|
| 121 |
+
proj /= np.sqrt(np.sum(proj ** 2, axis=0, keepdims=True)) + 1e-8
|
| 122 |
+
|
| 123 |
+
latent = features @ proj
|
| 124 |
+
|
| 125 |
+
# L2-normalise the latent vector
|
| 126 |
+
norm = np.linalg.norm(latent) + 1e-8
|
| 127 |
+
latent = latent / norm
|
| 128 |
+
|
| 129 |
+
return latent
|
eden_os/genesis/portrait_engine.py
ADDED
|
@@ -0,0 +1,233 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
EDEN OS — Genesis Engine: Portrait Processing Pipeline
|
| 3 |
+
Accepts uploaded image (numpy RGB), detects face via MediaPipe,
|
| 4 |
+
extracts landmarks, crops/aligns to 512x512, normalizes lighting.
|
| 5 |
+
"""
|
| 6 |
+
|
| 7 |
+
import numpy as np
|
| 8 |
+
import cv2
|
| 9 |
+
from loguru import logger
|
| 10 |
+
|
| 11 |
+
try:
|
| 12 |
+
import mediapipe as mp
|
| 13 |
+
except ImportError:
|
| 14 |
+
mp = None
|
| 15 |
+
logger.warning("mediapipe not installed — PortraitEngine will use fallback face detection")
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
class PortraitEngine:
|
| 19 |
+
"""Face detection, alignment, and enhancement pipeline."""
|
| 20 |
+
|
| 21 |
+
PORTRAIT_SIZE = 512
|
| 22 |
+
|
| 23 |
+
def __init__(self) -> None:
|
| 24 |
+
self._detector = None
|
| 25 |
+
self._initialized = False
|
| 26 |
+
|
| 27 |
+
# ------------------------------------------------------------------
|
| 28 |
+
# Lazy init — avoids loading MediaPipe until first use
|
| 29 |
+
# ------------------------------------------------------------------
|
| 30 |
+
def _ensure_initialized(self) -> None:
|
| 31 |
+
if self._initialized:
|
| 32 |
+
return
|
| 33 |
+
if mp is not None:
|
| 34 |
+
self._mp_face_detection = mp.solutions.face_detection
|
| 35 |
+
self._detector = self._mp_face_detection.FaceDetection(
|
| 36 |
+
model_selection=1, # full-range model (better for varied distances)
|
| 37 |
+
min_detection_confidence=0.5,
|
| 38 |
+
)
|
| 39 |
+
self._initialized = True
|
| 40 |
+
|
| 41 |
+
# ------------------------------------------------------------------
|
| 42 |
+
# Public API
|
| 43 |
+
# ------------------------------------------------------------------
|
| 44 |
+
async def process(self, image: np.ndarray) -> dict:
|
| 45 |
+
"""Process an uploaded RGB image into a standardised portrait dict.
|
| 46 |
+
|
| 47 |
+
Parameters
|
| 48 |
+
----------
|
| 49 |
+
image : np.ndarray
|
| 50 |
+
RGB image with shape (H, W, 3) and dtype uint8.
|
| 51 |
+
|
| 52 |
+
Returns
|
| 53 |
+
-------
|
| 54 |
+
dict with keys:
|
| 55 |
+
aligned_face – np.ndarray (512, 512, 3) uint8 RGB
|
| 56 |
+
landmarks – dict of landmark name → (x, y) normalised coords
|
| 57 |
+
bbox – (x, y, w, h) in pixel coords of the original image
|
| 58 |
+
original_image – the input image (unchanged)
|
| 59 |
+
"""
|
| 60 |
+
self._ensure_initialized()
|
| 61 |
+
logger.info("PortraitEngine.process — starting face detection")
|
| 62 |
+
|
| 63 |
+
if image is None or image.ndim != 3 or image.shape[2] != 3:
|
| 64 |
+
raise ValueError("Input must be an RGB image with shape (H, W, 3)")
|
| 65 |
+
|
| 66 |
+
h, w, _ = image.shape
|
| 67 |
+
|
| 68 |
+
# ----- face detection -----
|
| 69 |
+
bbox, landmarks = self._detect_face(image)
|
| 70 |
+
|
| 71 |
+
if bbox is None:
|
| 72 |
+
logger.warning("No face detected — using centre crop fallback")
|
| 73 |
+
bbox, landmarks = self._centre_crop_fallback(h, w)
|
| 74 |
+
|
| 75 |
+
# ----- crop face region with margin -----
|
| 76 |
+
cropped = self._crop_with_margin(image, bbox, margin_factor=0.4)
|
| 77 |
+
|
| 78 |
+
# ----- align face (rotate to upright using eye landmarks) -----
|
| 79 |
+
aligned = self._align_face(cropped, landmarks, bbox)
|
| 80 |
+
|
| 81 |
+
# ----- resize to standard portrait size -----
|
| 82 |
+
aligned = cv2.resize(aligned, (self.PORTRAIT_SIZE, self.PORTRAIT_SIZE),
|
| 83 |
+
interpolation=cv2.INTER_LANCZOS4)
|
| 84 |
+
|
| 85 |
+
# ----- normalise lighting (histogram eq on LAB L-channel) -----
|
| 86 |
+
aligned = self._normalise_lighting(aligned)
|
| 87 |
+
|
| 88 |
+
logger.info("PortraitEngine.process — done bbox={}", bbox)
|
| 89 |
+
return {
|
| 90 |
+
"aligned_face": aligned,
|
| 91 |
+
"landmarks": landmarks,
|
| 92 |
+
"bbox": bbox,
|
| 93 |
+
"original_image": image,
|
| 94 |
+
}
|
| 95 |
+
|
| 96 |
+
# ------------------------------------------------------------------
|
| 97 |
+
# Internal helpers
|
| 98 |
+
# ------------------------------------------------------------------
|
| 99 |
+
def _detect_face(self, image: np.ndarray):
|
| 100 |
+
"""Return (bbox, landmarks) using MediaPipe or fallback."""
|
| 101 |
+
h, w, _ = image.shape
|
| 102 |
+
|
| 103 |
+
if self._detector is None:
|
| 104 |
+
# Fallback: simple Haar cascade via OpenCV
|
| 105 |
+
return self._haar_fallback(image)
|
| 106 |
+
|
| 107 |
+
results = self._detector.process(image)
|
| 108 |
+
if not results.detections:
|
| 109 |
+
return None, {}
|
| 110 |
+
|
| 111 |
+
det = results.detections[0] # pick highest-confidence face
|
| 112 |
+
bb = det.location_data.relative_bounding_box
|
| 113 |
+
x = int(bb.xmin * w)
|
| 114 |
+
y = int(bb.ymin * h)
|
| 115 |
+
bw = int(bb.width * w)
|
| 116 |
+
bh = int(bb.height * h)
|
| 117 |
+
bbox = (max(x, 0), max(y, 0), bw, bh)
|
| 118 |
+
|
| 119 |
+
# Extract the 6 MediaPipe face-detection keypoints
|
| 120 |
+
keypoint_names = [
|
| 121 |
+
"right_eye", "left_eye", "nose_tip",
|
| 122 |
+
"mouth_center", "right_ear_tragion", "left_ear_tragion",
|
| 123 |
+
]
|
| 124 |
+
landmarks = {}
|
| 125 |
+
for i, kp in enumerate(det.location_data.relative_keypoints):
|
| 126 |
+
name = keypoint_names[i] if i < len(keypoint_names) else f"kp_{i}"
|
| 127 |
+
landmarks[name] = (float(kp.x), float(kp.y))
|
| 128 |
+
|
| 129 |
+
return bbox, landmarks
|
| 130 |
+
|
| 131 |
+
def _haar_fallback(self, image: np.ndarray):
|
| 132 |
+
"""Fallback face detection using OpenCV Haar cascade."""
|
| 133 |
+
h, w, _ = image.shape
|
| 134 |
+
gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
|
| 135 |
+
cascade_path = cv2.data.haarcascades + "haarcascade_frontalface_default.xml"
|
| 136 |
+
cascade = cv2.CascadeClassifier(cascade_path)
|
| 137 |
+
faces = cascade.detectMultiScale(gray, scaleFactor=1.1, minNeighbors=5,
|
| 138 |
+
minSize=(60, 60))
|
| 139 |
+
if len(faces) == 0:
|
| 140 |
+
return None, {}
|
| 141 |
+
|
| 142 |
+
# pick largest face
|
| 143 |
+
areas = [fw * fh for (_, _, fw, fh) in faces]
|
| 144 |
+
idx = int(np.argmax(areas))
|
| 145 |
+
x, y, fw, fh = faces[idx]
|
| 146 |
+
bbox = (int(x), int(y), int(fw), int(fh))
|
| 147 |
+
|
| 148 |
+
# approximate landmarks from bbox
|
| 149 |
+
cx, cy = x + fw / 2, y + fh / 2
|
| 150 |
+
landmarks = {
|
| 151 |
+
"right_eye": ((cx - fw * 0.15) / w, (cy - fh * 0.15) / h),
|
| 152 |
+
"left_eye": ((cx + fw * 0.15) / w, (cy - fh * 0.15) / h),
|
| 153 |
+
"nose_tip": (cx / w, cy / h),
|
| 154 |
+
"mouth_center": (cx / w, (cy + fh * 0.2) / h),
|
| 155 |
+
}
|
| 156 |
+
return bbox, landmarks
|
| 157 |
+
|
| 158 |
+
def _centre_crop_fallback(self, h: int, w: int):
|
| 159 |
+
"""When no face is found, assume the face is centered."""
|
| 160 |
+
side = min(h, w)
|
| 161 |
+
x = (w - side) // 2
|
| 162 |
+
y = (h - side) // 2
|
| 163 |
+
bbox = (x, y, side, side)
|
| 164 |
+
cx, cy = 0.5, 0.5
|
| 165 |
+
landmarks = {
|
| 166 |
+
"right_eye": (cx - 0.05, cy - 0.05),
|
| 167 |
+
"left_eye": (cx + 0.05, cy - 0.05),
|
| 168 |
+
"nose_tip": (cx, cy),
|
| 169 |
+
"mouth_center": (cx, cy + 0.08),
|
| 170 |
+
}
|
| 171 |
+
return bbox, landmarks
|
| 172 |
+
|
| 173 |
+
def _crop_with_margin(self, image: np.ndarray, bbox: tuple,
|
| 174 |
+
margin_factor: float = 0.4) -> np.ndarray:
|
| 175 |
+
"""Crop face region with extra margin around the bounding box."""
|
| 176 |
+
h, w, _ = image.shape
|
| 177 |
+
x, y, bw, bh = bbox
|
| 178 |
+
mx = int(bw * margin_factor)
|
| 179 |
+
my = int(bh * margin_factor)
|
| 180 |
+
x1 = max(x - mx, 0)
|
| 181 |
+
y1 = max(y - my, 0)
|
| 182 |
+
x2 = min(x + bw + mx, w)
|
| 183 |
+
y2 = min(y + bh + my, h)
|
| 184 |
+
cropped = image[y1:y2, x1:x2]
|
| 185 |
+
if cropped.size == 0:
|
| 186 |
+
return image # safety fallback
|
| 187 |
+
return cropped
|
| 188 |
+
|
| 189 |
+
def _align_face(self, cropped: np.ndarray, landmarks: dict,
|
| 190 |
+
bbox: tuple) -> np.ndarray:
|
| 191 |
+
"""Rotate image so that the line between the eyes is horizontal."""
|
| 192 |
+
if "left_eye" not in landmarks or "right_eye" not in landmarks:
|
| 193 |
+
return cropped
|
| 194 |
+
|
| 195 |
+
h, w, _ = cropped.shape
|
| 196 |
+
# landmarks are in normalised coords relative to the original image;
|
| 197 |
+
# convert to pixel coords within the cropped region is impractical
|
| 198 |
+
# without the crop offset, so we use a simpler heuristic: compute
|
| 199 |
+
# angle from the normalised coords (they preserve relative positions).
|
| 200 |
+
lx, ly = landmarks["left_eye"]
|
| 201 |
+
rx, ry = landmarks["right_eye"]
|
| 202 |
+
angle_rad = np.arctan2(ly - ry, lx - rx)
|
| 203 |
+
angle_deg = float(np.degrees(angle_rad))
|
| 204 |
+
|
| 205 |
+
# only rotate if tilt is significant but not extreme
|
| 206 |
+
if abs(angle_deg) < 0.5 or abs(angle_deg) > 45:
|
| 207 |
+
return cropped
|
| 208 |
+
|
| 209 |
+
center = (w // 2, h // 2)
|
| 210 |
+
rot_mat = cv2.getRotationMatrix2D(center, angle_deg, 1.0)
|
| 211 |
+
aligned = cv2.warpAffine(cropped, rot_mat, (w, h),
|
| 212 |
+
flags=cv2.INTER_LANCZOS4,
|
| 213 |
+
borderMode=cv2.BORDER_REFLECT_101)
|
| 214 |
+
return aligned
|
| 215 |
+
|
| 216 |
+
@staticmethod
|
| 217 |
+
def _normalise_lighting(image_rgb: np.ndarray) -> np.ndarray:
|
| 218 |
+
"""Histogram equalisation on the L channel of LAB colour space."""
|
| 219 |
+
lab = cv2.cvtColor(image_rgb, cv2.COLOR_RGB2LAB)
|
| 220 |
+
l_channel, a_channel, b_channel = cv2.split(lab)
|
| 221 |
+
clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
|
| 222 |
+
l_eq = clahe.apply(l_channel)
|
| 223 |
+
lab_eq = cv2.merge([l_eq, a_channel, b_channel])
|
| 224 |
+
return cv2.cvtColor(lab_eq, cv2.COLOR_LAB2RGB)
|
| 225 |
+
|
| 226 |
+
# ------------------------------------------------------------------
|
| 227 |
+
# Cleanup
|
| 228 |
+
# ------------------------------------------------------------------
|
| 229 |
+
def close(self) -> None:
|
| 230 |
+
if self._detector is not None:
|
| 231 |
+
self._detector.close()
|
| 232 |
+
self._detector = None
|
| 233 |
+
self._initialized = False
|
eden_os/genesis/preload_cache.py
ADDED
|
@@ -0,0 +1,172 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
EDEN OS — Genesis Engine: Preload Cache
|
| 3 |
+
Pre-computes idle animation seed data so the avatar is alive on load
|
| 4 |
+
with ZERO wait time.
|
| 5 |
+
|
| 6 |
+
Given a processed portrait, generates:
|
| 7 |
+
- N seed frames with slight random perturbations:
|
| 8 |
+
* micro blink positions (eyelid offset values)
|
| 9 |
+
* slight head rotations (affine transforms)
|
| 10 |
+
- Breathing cycle keyframes (6 frames of subtle vertical shift).
|
| 11 |
+
|
| 12 |
+
Returns a cache dict with 'seed_frames' and 'breathing_cycle' lists.
|
| 13 |
+
"""
|
| 14 |
+
|
| 15 |
+
from __future__ import annotations
|
| 16 |
+
|
| 17 |
+
import numpy as np
|
| 18 |
+
import cv2
|
| 19 |
+
from loguru import logger
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
class PreloadCache:
|
| 23 |
+
"""Pre-compute idle animation seed data for instant-ready avatars."""
|
| 24 |
+
|
| 25 |
+
DEFAULT_NUM_SEEDS = 8
|
| 26 |
+
BREATHING_FRAMES = 6
|
| 27 |
+
|
| 28 |
+
def __init__(self, num_seeds: int = 8) -> None:
|
| 29 |
+
self.num_seeds = num_seeds
|
| 30 |
+
|
| 31 |
+
# ------------------------------------------------------------------
|
| 32 |
+
# Public API
|
| 33 |
+
# ------------------------------------------------------------------
|
| 34 |
+
async def compute(self, profile: dict) -> dict:
|
| 35 |
+
"""Pre-compute idle seed frames and breathing keyframes.
|
| 36 |
+
|
| 37 |
+
Parameters
|
| 38 |
+
----------
|
| 39 |
+
profile : dict
|
| 40 |
+
Must contain 'aligned_face' (np.ndarray, 512x512x3 RGB uint8)
|
| 41 |
+
as produced by PortraitEngine.process().
|
| 42 |
+
Optional keys: 'landmarks' (dict), 'bbox' (tuple).
|
| 43 |
+
|
| 44 |
+
Returns
|
| 45 |
+
-------
|
| 46 |
+
dict with keys:
|
| 47 |
+
seed_frames – list[dict] each containing 'frame' (np.ndarray),
|
| 48 |
+
'blink_offset' (float), 'head_rotation_deg' (float)
|
| 49 |
+
breathing_cycle – list[dict] each containing 'frame' (np.ndarray),
|
| 50 |
+
'vertical_shift_px' (float), 'phase' (float 0..1)
|
| 51 |
+
"""
|
| 52 |
+
face = profile.get("aligned_face")
|
| 53 |
+
if face is None:
|
| 54 |
+
raise ValueError("profile must contain 'aligned_face' key")
|
| 55 |
+
|
| 56 |
+
logger.info("PreloadCache.compute — generating {} seed frames + {} breathing frames",
|
| 57 |
+
self.num_seeds, self.BREATHING_FRAMES)
|
| 58 |
+
|
| 59 |
+
rng = np.random.RandomState(seed=7)
|
| 60 |
+
|
| 61 |
+
seed_frames = self._generate_seed_frames(face, rng)
|
| 62 |
+
breathing_cycle = self._generate_breathing_cycle(face)
|
| 63 |
+
|
| 64 |
+
cache = {
|
| 65 |
+
"seed_frames": seed_frames,
|
| 66 |
+
"breathing_cycle": breathing_cycle,
|
| 67 |
+
}
|
| 68 |
+
logger.info("PreloadCache.compute — done ({} seed, {} breathing)",
|
| 69 |
+
len(seed_frames), len(breathing_cycle))
|
| 70 |
+
return cache
|
| 71 |
+
|
| 72 |
+
# ------------------------------------------------------------------
|
| 73 |
+
# Seed frames (blink + micro head rotation)
|
| 74 |
+
# ------------------------------------------------------------------
|
| 75 |
+
def _generate_seed_frames(self, face: np.ndarray,
|
| 76 |
+
rng: np.random.RandomState) -> list[dict]:
|
| 77 |
+
"""Generate N seed frames with subtle random perturbations."""
|
| 78 |
+
h, w = face.shape[:2]
|
| 79 |
+
center = (w / 2, h / 2)
|
| 80 |
+
seed_frames: list[dict] = []
|
| 81 |
+
|
| 82 |
+
for i in range(self.num_seeds):
|
| 83 |
+
# Random micro head rotation: +/- 2 degrees
|
| 84 |
+
rotation_deg = float(rng.uniform(-2.0, 2.0))
|
| 85 |
+
# Random blink offset: 0 = eyes open, 1 = fully closed
|
| 86 |
+
# Most frames are open; occasionally partially closed
|
| 87 |
+
blink_offset = float(rng.choice(
|
| 88 |
+
[0.0, 0.0, 0.0, 0.0, 0.05, 0.1, 0.3, 0.8],
|
| 89 |
+
))
|
| 90 |
+
|
| 91 |
+
# Apply rotation via affine transform
|
| 92 |
+
rot_mat = cv2.getRotationMatrix2D(center, rotation_deg, 1.0)
|
| 93 |
+
frame = cv2.warpAffine(face, rot_mat, (w, h),
|
| 94 |
+
flags=cv2.INTER_LINEAR,
|
| 95 |
+
borderMode=cv2.BORDER_REFLECT_101)
|
| 96 |
+
|
| 97 |
+
# Simulate blink by darkening the upper-face region proportionally
|
| 98 |
+
if blink_offset > 0.01:
|
| 99 |
+
frame = self._apply_blink_overlay(frame, blink_offset)
|
| 100 |
+
|
| 101 |
+
seed_frames.append({
|
| 102 |
+
"frame": frame,
|
| 103 |
+
"blink_offset": blink_offset,
|
| 104 |
+
"head_rotation_deg": rotation_deg,
|
| 105 |
+
"index": i,
|
| 106 |
+
})
|
| 107 |
+
|
| 108 |
+
return seed_frames
|
| 109 |
+
|
| 110 |
+
# ------------------------------------------------------------------
|
| 111 |
+
# Breathing cycle
|
| 112 |
+
# ------------------------------------------------------------------
|
| 113 |
+
def _generate_breathing_cycle(self, face: np.ndarray) -> list[dict]:
|
| 114 |
+
"""Generate 6 keyframes for a subtle breathing motion.
|
| 115 |
+
|
| 116 |
+
Breathing is modelled as a vertical sinusoidal shift of 1-3 pixels —
|
| 117 |
+
just enough to feel organic without being distracting.
|
| 118 |
+
"""
|
| 119 |
+
h, w = face.shape[:2]
|
| 120 |
+
max_shift_px = 2.5 # peak of inhale
|
| 121 |
+
frames: list[dict] = []
|
| 122 |
+
|
| 123 |
+
for i in range(self.BREATHING_FRAMES):
|
| 124 |
+
phase = i / self.BREATHING_FRAMES # 0.0 → ~1.0
|
| 125 |
+
# Sinusoidal vertical shift (inhale = up, exhale = down)
|
| 126 |
+
shift_px = max_shift_px * np.sin(2 * np.pi * phase)
|
| 127 |
+
|
| 128 |
+
# Apply vertical translation
|
| 129 |
+
trans_mat = np.float32([[1, 0, 0], [0, 1, -shift_px]])
|
| 130 |
+
frame = cv2.warpAffine(face, trans_mat, (w, h),
|
| 131 |
+
flags=cv2.INTER_LINEAR,
|
| 132 |
+
borderMode=cv2.BORDER_REFLECT_101)
|
| 133 |
+
|
| 134 |
+
frames.append({
|
| 135 |
+
"frame": frame,
|
| 136 |
+
"vertical_shift_px": float(shift_px),
|
| 137 |
+
"phase": float(phase),
|
| 138 |
+
"index": i,
|
| 139 |
+
})
|
| 140 |
+
|
| 141 |
+
return frames
|
| 142 |
+
|
| 143 |
+
# ------------------------------------------------------------------
|
| 144 |
+
# Blink overlay
|
| 145 |
+
# ------------------------------------------------------------------
|
| 146 |
+
@staticmethod
|
| 147 |
+
def _apply_blink_overlay(frame: np.ndarray, blink_offset: float) -> np.ndarray:
|
| 148 |
+
"""Simulate eyelid closure by blending the eye region towards
|
| 149 |
+
a skin-tone average. blink_offset in [0, 1]."""
|
| 150 |
+
result = frame.copy()
|
| 151 |
+
h, w = frame.shape[:2]
|
| 152 |
+
|
| 153 |
+
# Eye region: roughly the upper-middle band of the portrait
|
| 154 |
+
eye_y_start = int(h * 0.28)
|
| 155 |
+
eye_y_end = int(h * 0.42)
|
| 156 |
+
eye_x_start = int(w * 0.2)
|
| 157 |
+
eye_x_end = int(w * 0.8)
|
| 158 |
+
|
| 159 |
+
eye_region = result[eye_y_start:eye_y_end, eye_x_start:eye_x_end]
|
| 160 |
+
if eye_region.size == 0:
|
| 161 |
+
return result
|
| 162 |
+
|
| 163 |
+
# Compute the mean skin colour around the eyes as the "closed lid" colour
|
| 164 |
+
skin_color = np.mean(eye_region, axis=(0, 1)).astype(np.uint8)
|
| 165 |
+
closed_lid = np.full_like(eye_region, skin_color)
|
| 166 |
+
|
| 167 |
+
# Blend: higher blink_offset → more closed
|
| 168 |
+
alpha = np.clip(blink_offset, 0.0, 1.0)
|
| 169 |
+
blended = cv2.addWeighted(eye_region, 1.0 - alpha, closed_lid, alpha, 0)
|
| 170 |
+
result[eye_y_start:eye_y_end, eye_x_start:eye_x_end] = blended
|
| 171 |
+
|
| 172 |
+
return result
|
eden_os/scholar/__init__.py
ADDED
|
@@ -0,0 +1,134 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
EDEN OS — Scholar Engine (Agent 7)
|
| 3 |
+
Knowledge Engine + Media Ingestion + RAG Retrieval.
|
| 4 |
+
|
| 5 |
+
Composes all sub-modules:
|
| 6 |
+
- YouTubeIngestor: YouTube video transcription pipeline
|
| 7 |
+
- AudiobookIngestor: Audiobook/media file processing
|
| 8 |
+
- URLIngestor: Web page and PDF ingestion
|
| 9 |
+
- KnowledgeGraph: Lightweight entity/relationship graph
|
| 10 |
+
- RAGRetriever: ChromaDB-backed semantic retrieval
|
| 11 |
+
- MediaAnalyzer: Batch processing controller
|
| 12 |
+
"""
|
| 13 |
+
|
| 14 |
+
from eden_os.shared.interfaces import IScholarEngine
|
| 15 |
+
from eden_os.shared.types import IngestionResult, KnowledgeChunk, KnowledgeSummary
|
| 16 |
+
|
| 17 |
+
from loguru import logger
|
| 18 |
+
|
| 19 |
+
from eden_os.scholar.youtube_ingestor import YouTubeIngestor
|
| 20 |
+
from eden_os.scholar.audiobook_ingestor import AudiobookIngestor
|
| 21 |
+
from eden_os.scholar.url_ingestor import URLIngestor
|
| 22 |
+
from eden_os.scholar.knowledge_graph import KnowledgeGraph
|
| 23 |
+
from eden_os.scholar.rag_retriever import RAGRetriever
|
| 24 |
+
from eden_os.scholar.media_analyzer import MediaAnalyzer
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
class ScholarEngine(IScholarEngine):
|
| 28 |
+
"""
|
| 29 |
+
Agent 7: Knowledge Engine + RAG.
|
| 30 |
+
|
| 31 |
+
Composes all scholar sub-modules into a single engine that
|
| 32 |
+
implements the IScholarEngine interface. Handles YouTube videos,
|
| 33 |
+
audiobooks, web pages, PDFs, and provides RAG retrieval for
|
| 34 |
+
the Brain engine.
|
| 35 |
+
"""
|
| 36 |
+
|
| 37 |
+
def __init__(self, chromadb_path: str | None = None):
|
| 38 |
+
"""
|
| 39 |
+
Initialize the Scholar Engine with all sub-modules.
|
| 40 |
+
|
| 41 |
+
Args:
|
| 42 |
+
chromadb_path: Path for ChromaDB persistent storage.
|
| 43 |
+
Defaults to ~/EDEN-OS/data/chromadb.
|
| 44 |
+
"""
|
| 45 |
+
logger.info("Initializing Scholar Engine (Agent 7)...")
|
| 46 |
+
|
| 47 |
+
self.youtube = YouTubeIngestor()
|
| 48 |
+
self.audiobook = AudiobookIngestor()
|
| 49 |
+
self.url = URLIngestor()
|
| 50 |
+
self.knowledge_graph = KnowledgeGraph()
|
| 51 |
+
self.rag = RAGRetriever(db_path=chromadb_path)
|
| 52 |
+
self.analyzer = MediaAnalyzer()
|
| 53 |
+
|
| 54 |
+
logger.info("Scholar Engine initialized.")
|
| 55 |
+
|
| 56 |
+
async def ingest_youtube(self, url: str) -> IngestionResult:
|
| 57 |
+
"""Ingest YouTube video: download, transcribe, chunk, embed."""
|
| 58 |
+
logger.info(f"Scholar: ingesting YouTube video: {url}")
|
| 59 |
+
|
| 60 |
+
chunks, result = await self.youtube.ingest(url)
|
| 61 |
+
|
| 62 |
+
if chunks:
|
| 63 |
+
self.rag.add_chunks(chunks)
|
| 64 |
+
self.knowledge_graph.add_entities(chunks)
|
| 65 |
+
logger.info(
|
| 66 |
+
f"YouTube ingestion complete: {len(chunks)} chunks embedded"
|
| 67 |
+
)
|
| 68 |
+
|
| 69 |
+
return result
|
| 70 |
+
|
| 71 |
+
async def ingest_audiobook(self, file_path: str) -> IngestionResult:
|
| 72 |
+
"""Ingest audiobook: transcribe, chunk by topic, embed."""
|
| 73 |
+
logger.info(f"Scholar: ingesting audiobook: {file_path}")
|
| 74 |
+
|
| 75 |
+
chunks, result = await self.audiobook.ingest(file_path)
|
| 76 |
+
|
| 77 |
+
if chunks:
|
| 78 |
+
self.rag.add_chunks(chunks)
|
| 79 |
+
self.knowledge_graph.add_entities(chunks)
|
| 80 |
+
logger.info(
|
| 81 |
+
f"Audiobook ingestion complete: {len(chunks)} chunks embedded"
|
| 82 |
+
)
|
| 83 |
+
|
| 84 |
+
return result
|
| 85 |
+
|
| 86 |
+
async def ingest_url(self, url: str) -> IngestionResult:
|
| 87 |
+
"""Ingest web URL or PDF: extract, chunk, embed."""
|
| 88 |
+
logger.info(f"Scholar: ingesting URL: {url}")
|
| 89 |
+
|
| 90 |
+
chunks, result = await self.url.ingest(url)
|
| 91 |
+
|
| 92 |
+
if chunks:
|
| 93 |
+
self.rag.add_chunks(chunks)
|
| 94 |
+
self.knowledge_graph.add_entities(chunks)
|
| 95 |
+
logger.info(
|
| 96 |
+
f"URL ingestion complete: {len(chunks)} chunks embedded"
|
| 97 |
+
)
|
| 98 |
+
|
| 99 |
+
return result
|
| 100 |
+
|
| 101 |
+
async def retrieve(
|
| 102 |
+
self, query: str, top_k: int = 5
|
| 103 |
+
) -> list[KnowledgeChunk]:
|
| 104 |
+
"""Retrieve relevant knowledge chunks for a query using hybrid search."""
|
| 105 |
+
logger.debug(f"Scholar: retrieving for query: '{query[:80]}...'")
|
| 106 |
+
return self.rag.retrieve(query, top_k=top_k)
|
| 107 |
+
|
| 108 |
+
async def analyze_all(self) -> KnowledgeSummary:
|
| 109 |
+
"""Batch process all pending ingestion jobs."""
|
| 110 |
+
logger.info("Scholar: running batch analysis on all pending jobs...")
|
| 111 |
+
return await self.analyzer.analyze_all(
|
| 112 |
+
youtube_ingestor=self.youtube,
|
| 113 |
+
audiobook_ingestor=self.audiobook,
|
| 114 |
+
url_ingestor=self.url,
|
| 115 |
+
rag_retriever=self.rag,
|
| 116 |
+
knowledge_graph=self.knowledge_graph,
|
| 117 |
+
)
|
| 118 |
+
|
| 119 |
+
async def get_knowledge_summary(self) -> KnowledgeSummary:
|
| 120 |
+
"""Get summary of all ingested knowledge."""
|
| 121 |
+
summary = self.analyzer.get_summary(rag_retriever=self.rag)
|
| 122 |
+
graph_summary = self.knowledge_graph.get_summary()
|
| 123 |
+
summary.sources["_knowledge_graph"] = {
|
| 124 |
+
"entities": graph_summary["total_entities"],
|
| 125 |
+
"relationships": graph_summary["total_relationships"],
|
| 126 |
+
}
|
| 127 |
+
return summary
|
| 128 |
+
|
| 129 |
+
def queue_job(self, source_type: str, source: str) -> str:
|
| 130 |
+
"""Queue a job for batch processing via analyze_all(). Returns job_id."""
|
| 131 |
+
return self.analyzer.add_job(source_type, source)
|
| 132 |
+
|
| 133 |
+
|
| 134 |
+
__all__ = ["ScholarEngine"]
|
eden_os/scholar/audiobook_ingestor.py
ADDED
|
@@ -0,0 +1,299 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
EDEN OS — Scholar Engine: Audiobook Ingestor
|
| 3 |
+
Processes MP3/WAV/M4A files via Whisper with semantic chunking
|
| 4 |
+
by topic boundaries (long pauses and topic shifts).
|
| 5 |
+
"""
|
| 6 |
+
|
| 7 |
+
import os
|
| 8 |
+
import uuid
|
| 9 |
+
from pathlib import Path
|
| 10 |
+
|
| 11 |
+
from loguru import logger
|
| 12 |
+
|
| 13 |
+
from eden_os.shared.types import KnowledgeChunk, IngestionResult
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
class AudiobookIngestor:
|
| 17 |
+
"""Ingests audiobook/media files: transcribe in segments, semantic chunking."""
|
| 18 |
+
|
| 19 |
+
# Supported audio formats
|
| 20 |
+
SUPPORTED_FORMATS = {".mp3", ".wav", ".m4a", ".flac", ".ogg", ".wma"}
|
| 21 |
+
|
| 22 |
+
# Segment duration in seconds for processing long audio
|
| 23 |
+
SEGMENT_DURATION = 600 # 10 minutes per segment
|
| 24 |
+
|
| 25 |
+
# Pause threshold in seconds to detect topic boundaries
|
| 26 |
+
PAUSE_THRESHOLD = 2.0
|
| 27 |
+
|
| 28 |
+
def __init__(self):
|
| 29 |
+
self._whisper_model = None
|
| 30 |
+
|
| 31 |
+
def _get_whisper_model(self):
|
| 32 |
+
"""Lazy-load whisper model."""
|
| 33 |
+
if self._whisper_model is None:
|
| 34 |
+
import whisper
|
| 35 |
+
logger.info("Loading Whisper model (base) for audiobook transcription...")
|
| 36 |
+
self._whisper_model = whisper.load_model("base")
|
| 37 |
+
logger.info("Whisper model loaded.")
|
| 38 |
+
return self._whisper_model
|
| 39 |
+
|
| 40 |
+
def _validate_file(self, file_path: str) -> str | None:
|
| 41 |
+
"""Validate file exists and is a supported format. Returns error string or None."""
|
| 42 |
+
if not os.path.exists(file_path):
|
| 43 |
+
return f"File not found: {file_path}"
|
| 44 |
+
|
| 45 |
+
ext = Path(file_path).suffix.lower()
|
| 46 |
+
if ext not in self.SUPPORTED_FORMATS:
|
| 47 |
+
return f"Unsupported format '{ext}'. Supported: {', '.join(sorted(self.SUPPORTED_FORMATS))}"
|
| 48 |
+
|
| 49 |
+
return None
|
| 50 |
+
|
| 51 |
+
def _get_audio_duration(self, file_path: str) -> float:
|
| 52 |
+
"""Get audio duration in seconds using basic file inspection."""
|
| 53 |
+
try:
|
| 54 |
+
import wave
|
| 55 |
+
|
| 56 |
+
if file_path.lower().endswith(".wav"):
|
| 57 |
+
with wave.open(file_path, "r") as wf:
|
| 58 |
+
frames = wf.getnframes()
|
| 59 |
+
rate = wf.getframerate()
|
| 60 |
+
return frames / float(rate)
|
| 61 |
+
except Exception:
|
| 62 |
+
pass
|
| 63 |
+
|
| 64 |
+
# Fallback: use whisper's audio loading to estimate
|
| 65 |
+
try:
|
| 66 |
+
import whisper
|
| 67 |
+
audio = whisper.load_audio(file_path)
|
| 68 |
+
return len(audio) / 16000.0 # whisper resamples to 16kHz
|
| 69 |
+
except Exception:
|
| 70 |
+
return 0.0
|
| 71 |
+
|
| 72 |
+
def _transcribe_full(self, file_path: str) -> dict:
|
| 73 |
+
"""Transcribe audio file using Whisper. Handles long files via internal segmenting."""
|
| 74 |
+
model = self._get_whisper_model()
|
| 75 |
+
logger.info(f"Transcribing audiobook: {file_path}")
|
| 76 |
+
|
| 77 |
+
# Whisper handles long audio internally with 30-second windows
|
| 78 |
+
result = model.transcribe(
|
| 79 |
+
file_path,
|
| 80 |
+
verbose=False,
|
| 81 |
+
condition_on_previous_text=True,
|
| 82 |
+
)
|
| 83 |
+
seg_count = len(result.get("segments", []))
|
| 84 |
+
logger.info(f"Transcription complete: {seg_count} segments")
|
| 85 |
+
return result
|
| 86 |
+
|
| 87 |
+
def _detect_topic_boundaries(self, segments: list[dict]) -> list[int]:
|
| 88 |
+
"""
|
| 89 |
+
Detect topic boundaries by finding long pauses between segments
|
| 90 |
+
and significant vocabulary shifts.
|
| 91 |
+
Returns list of segment indices where topic boundaries occur.
|
| 92 |
+
"""
|
| 93 |
+
boundaries: list[int] = []
|
| 94 |
+
|
| 95 |
+
for i in range(1, len(segments)):
|
| 96 |
+
prev_end = segments[i - 1].get("end", 0.0)
|
| 97 |
+
curr_start = segments[i].get("start", 0.0)
|
| 98 |
+
gap = curr_start - prev_end
|
| 99 |
+
|
| 100 |
+
# Long pause indicates topic boundary
|
| 101 |
+
if gap >= self.PAUSE_THRESHOLD:
|
| 102 |
+
boundaries.append(i)
|
| 103 |
+
continue
|
| 104 |
+
|
| 105 |
+
# Check for topic shift via keyword change
|
| 106 |
+
prev_words = set(segments[i - 1].get("text", "").lower().split())
|
| 107 |
+
curr_words = set(segments[i].get("text", "").lower().split())
|
| 108 |
+
|
| 109 |
+
if prev_words and curr_words:
|
| 110 |
+
# Low overlap suggests topic shift
|
| 111 |
+
overlap = len(prev_words & curr_words)
|
| 112 |
+
total = max(len(prev_words | curr_words), 1)
|
| 113 |
+
similarity = overlap / total
|
| 114 |
+
|
| 115 |
+
if similarity < 0.05 and len(curr_words) > 3:
|
| 116 |
+
boundaries.append(i)
|
| 117 |
+
|
| 118 |
+
return boundaries
|
| 119 |
+
|
| 120 |
+
def _semantic_chunk(
|
| 121 |
+
self, segments: list[dict], file_path: str
|
| 122 |
+
) -> list[KnowledgeChunk]:
|
| 123 |
+
"""
|
| 124 |
+
Group segments into semantic chunks based on topic boundaries.
|
| 125 |
+
Falls back to size-based chunking if no boundaries detected.
|
| 126 |
+
"""
|
| 127 |
+
if not segments:
|
| 128 |
+
return []
|
| 129 |
+
|
| 130 |
+
boundaries = self._detect_topic_boundaries(segments)
|
| 131 |
+
logger.info(f"Detected {len(boundaries)} topic boundaries")
|
| 132 |
+
|
| 133 |
+
# Add start and end boundaries
|
| 134 |
+
split_points = [0] + boundaries + [len(segments)]
|
| 135 |
+
|
| 136 |
+
chunks: list[KnowledgeChunk] = []
|
| 137 |
+
chapter_num = 0
|
| 138 |
+
|
| 139 |
+
for i in range(len(split_points) - 1):
|
| 140 |
+
start_idx = split_points[i]
|
| 141 |
+
end_idx = split_points[i + 1]
|
| 142 |
+
|
| 143 |
+
group = segments[start_idx:end_idx]
|
| 144 |
+
if not group:
|
| 145 |
+
continue
|
| 146 |
+
|
| 147 |
+
text = " ".join(seg.get("text", "").strip() for seg in group).strip()
|
| 148 |
+
if not text:
|
| 149 |
+
continue
|
| 150 |
+
|
| 151 |
+
# If a chunk is too long (>1000 chars), split it further
|
| 152 |
+
if len(text) > 1000:
|
| 153 |
+
sub_chunks = self._split_long_chunk(text, group, file_path, chapter_num)
|
| 154 |
+
chunks.extend(sub_chunks)
|
| 155 |
+
chapter_num += len(sub_chunks)
|
| 156 |
+
else:
|
| 157 |
+
chapter_num += 1
|
| 158 |
+
start_sec = group[0].get("start", 0.0)
|
| 159 |
+
end_sec = group[-1].get("end", 0.0)
|
| 160 |
+
|
| 161 |
+
chunks.append(
|
| 162 |
+
KnowledgeChunk(
|
| 163 |
+
text=text,
|
| 164 |
+
source_type="audiobook",
|
| 165 |
+
source_id=file_path,
|
| 166 |
+
timestamp=self._format_timestamp(start_sec),
|
| 167 |
+
chapter=f"Section {chapter_num}",
|
| 168 |
+
metadata={
|
| 169 |
+
"start_seconds": start_sec,
|
| 170 |
+
"end_seconds": end_sec,
|
| 171 |
+
"segment_count": len(group),
|
| 172 |
+
},
|
| 173 |
+
)
|
| 174 |
+
)
|
| 175 |
+
|
| 176 |
+
return chunks
|
| 177 |
+
|
| 178 |
+
def _split_long_chunk(
|
| 179 |
+
self, text: str, segments: list[dict], file_path: str, base_chapter: int
|
| 180 |
+
) -> list[KnowledgeChunk]:
|
| 181 |
+
"""Split a long text chunk into smaller pieces at sentence boundaries."""
|
| 182 |
+
import re
|
| 183 |
+
|
| 184 |
+
sentences = re.split(r"(?<=[.!?])\s+", text)
|
| 185 |
+
chunks: list[KnowledgeChunk] = []
|
| 186 |
+
current = ""
|
| 187 |
+
sub_idx = 0
|
| 188 |
+
|
| 189 |
+
# Estimate time per character for timestamp approximation
|
| 190 |
+
total_chars = max(len(text), 1)
|
| 191 |
+
start_sec = segments[0].get("start", 0.0) if segments else 0.0
|
| 192 |
+
end_sec = segments[-1].get("end", 0.0) if segments else 0.0
|
| 193 |
+
duration = end_sec - start_sec
|
| 194 |
+
|
| 195 |
+
for sentence in sentences:
|
| 196 |
+
current += " " + sentence
|
| 197 |
+
if len(current) >= 400:
|
| 198 |
+
sub_idx += 1
|
| 199 |
+
char_ratio = max(len(current.strip()), 1) / total_chars
|
| 200 |
+
chunk_start = start_sec + (duration * (1.0 - char_ratio))
|
| 201 |
+
|
| 202 |
+
chunks.append(
|
| 203 |
+
KnowledgeChunk(
|
| 204 |
+
text=current.strip(),
|
| 205 |
+
source_type="audiobook",
|
| 206 |
+
source_id=file_path,
|
| 207 |
+
timestamp=self._format_timestamp(chunk_start),
|
| 208 |
+
chapter=f"Section {base_chapter + sub_idx}",
|
| 209 |
+
metadata={
|
| 210 |
+
"start_seconds": chunk_start,
|
| 211 |
+
"estimated_timestamp": True,
|
| 212 |
+
},
|
| 213 |
+
)
|
| 214 |
+
)
|
| 215 |
+
current = ""
|
| 216 |
+
|
| 217 |
+
if current.strip():
|
| 218 |
+
sub_idx += 1
|
| 219 |
+
chunks.append(
|
| 220 |
+
KnowledgeChunk(
|
| 221 |
+
text=current.strip(),
|
| 222 |
+
source_type="audiobook",
|
| 223 |
+
source_id=file_path,
|
| 224 |
+
timestamp=self._format_timestamp(end_sec),
|
| 225 |
+
chapter=f"Section {base_chapter + sub_idx}",
|
| 226 |
+
metadata={"start_seconds": end_sec, "estimated_timestamp": True},
|
| 227 |
+
)
|
| 228 |
+
)
|
| 229 |
+
|
| 230 |
+
return chunks
|
| 231 |
+
|
| 232 |
+
@staticmethod
|
| 233 |
+
def _format_timestamp(seconds: float) -> str:
|
| 234 |
+
"""Convert seconds to HH:MM:SS or MM:SS format."""
|
| 235 |
+
total = int(seconds)
|
| 236 |
+
h = total // 3600
|
| 237 |
+
m = (total % 3600) // 60
|
| 238 |
+
s = total % 60
|
| 239 |
+
if h > 0:
|
| 240 |
+
return f"{h}:{m:02d}:{s:02d}"
|
| 241 |
+
return f"{m}:{s:02d}"
|
| 242 |
+
|
| 243 |
+
async def ingest(
|
| 244 |
+
self, file_path: str
|
| 245 |
+
) -> tuple[list[KnowledgeChunk], IngestionResult]:
|
| 246 |
+
"""
|
| 247 |
+
Full audiobook ingestion pipeline:
|
| 248 |
+
1. Validate file
|
| 249 |
+
2. Transcribe via Whisper
|
| 250 |
+
3. Detect topic boundaries
|
| 251 |
+
4. Semantic chunking
|
| 252 |
+
Returns (chunks, result).
|
| 253 |
+
"""
|
| 254 |
+
job_id = uuid.uuid4().hex[:12]
|
| 255 |
+
logger.info(f"[{job_id}] Starting audiobook ingestion: {file_path}")
|
| 256 |
+
|
| 257 |
+
error = self._validate_file(file_path)
|
| 258 |
+
if error:
|
| 259 |
+
logger.warning(f"[{job_id}] Validation failed: {error}")
|
| 260 |
+
return [], IngestionResult(
|
| 261 |
+
job_id=job_id,
|
| 262 |
+
source_type="audiobook",
|
| 263 |
+
chunks_created=0,
|
| 264 |
+
status="failed",
|
| 265 |
+
error=error,
|
| 266 |
+
)
|
| 267 |
+
|
| 268 |
+
try:
|
| 269 |
+
result = self._transcribe_full(file_path)
|
| 270 |
+
except Exception as e:
|
| 271 |
+
logger.error(f"[{job_id}] Transcription failed: {e}")
|
| 272 |
+
return [], IngestionResult(
|
| 273 |
+
job_id=job_id,
|
| 274 |
+
source_type="audiobook",
|
| 275 |
+
chunks_created=0,
|
| 276 |
+
status="failed",
|
| 277 |
+
error=f"Transcription failed: {str(e)}",
|
| 278 |
+
)
|
| 279 |
+
|
| 280 |
+
segments = result.get("segments", [])
|
| 281 |
+
if not segments:
|
| 282 |
+
logger.warning(f"[{job_id}] No speech detected")
|
| 283 |
+
return [], IngestionResult(
|
| 284 |
+
job_id=job_id,
|
| 285 |
+
source_type="audiobook",
|
| 286 |
+
chunks_created=0,
|
| 287 |
+
status="completed",
|
| 288 |
+
error="No speech detected in audio file",
|
| 289 |
+
)
|
| 290 |
+
|
| 291 |
+
chunks = self._semantic_chunk(segments, file_path)
|
| 292 |
+
logger.info(f"[{job_id}] Created {len(chunks)} knowledge chunks from audiobook")
|
| 293 |
+
|
| 294 |
+
return chunks, IngestionResult(
|
| 295 |
+
job_id=job_id,
|
| 296 |
+
source_type="audiobook",
|
| 297 |
+
chunks_created=len(chunks),
|
| 298 |
+
status="completed",
|
| 299 |
+
)
|
eden_os/scholar/knowledge_graph.py
ADDED
|
@@ -0,0 +1,305 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
EDEN OS — Scholar Engine: Knowledge Graph
|
| 3 |
+
Lightweight knowledge graph using regex + keyword extraction.
|
| 4 |
+
Stores entities and relationships extracted from ingested content.
|
| 5 |
+
"""
|
| 6 |
+
|
| 7 |
+
import re
|
| 8 |
+
from collections import defaultdict
|
| 9 |
+
from dataclasses import dataclass, field
|
| 10 |
+
|
| 11 |
+
from loguru import logger
|
| 12 |
+
|
| 13 |
+
from eden_os.shared.types import KnowledgeChunk
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
@dataclass
|
| 17 |
+
class Entity:
|
| 18 |
+
"""A named entity in the knowledge graph."""
|
| 19 |
+
name: str
|
| 20 |
+
entity_type: str # "person", "product", "concept", "organization", "technology"
|
| 21 |
+
mentions: int = 0
|
| 22 |
+
sources: list[str] = field(default_factory=list)
|
| 23 |
+
context_snippets: list[str] = field(default_factory=list)
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
@dataclass
|
| 27 |
+
class Relationship:
|
| 28 |
+
"""A relationship between two entities."""
|
| 29 |
+
source: str
|
| 30 |
+
target: str
|
| 31 |
+
relation_type: str # "mentions", "uses", "related_to", "created_by", "part_of"
|
| 32 |
+
weight: float = 1.0
|
| 33 |
+
|
| 34 |
+
|
| 35 |
+
class KnowledgeGraph:
|
| 36 |
+
"""
|
| 37 |
+
Lightweight knowledge graph for extracted entities and relationships.
|
| 38 |
+
Uses regex and keyword patterns for entity extraction.
|
| 39 |
+
"""
|
| 40 |
+
|
| 41 |
+
# Patterns for entity extraction
|
| 42 |
+
PERSON_INDICATORS = {
|
| 43 |
+
"dr.", "dr ", "professor", "prof.", "prof ", "ceo", "founder",
|
| 44 |
+
"author", "researcher", "scientist", "engineer", "mr.", "mrs.",
|
| 45 |
+
"ms.", "said", "argues", "explains", "wrote", "discovered",
|
| 46 |
+
}
|
| 47 |
+
|
| 48 |
+
PRODUCT_INDICATORS = {
|
| 49 |
+
"app", "platform", "software", "tool", "framework", "library",
|
| 50 |
+
"product", "service", "api", "model", "system", "engine",
|
| 51 |
+
"version", "v1", "v2", "release",
|
| 52 |
+
}
|
| 53 |
+
|
| 54 |
+
CONCEPT_INDICATORS = {
|
| 55 |
+
"theory", "method", "approach", "technique", "algorithm",
|
| 56 |
+
"protocol", "principle", "paradigm", "architecture", "pattern",
|
| 57 |
+
"strategy", "process", "mechanism", "framework",
|
| 58 |
+
}
|
| 59 |
+
|
| 60 |
+
# Common stopwords to filter out
|
| 61 |
+
STOPWORDS = {
|
| 62 |
+
"the", "a", "an", "is", "are", "was", "were", "be", "been",
|
| 63 |
+
"being", "have", "has", "had", "do", "does", "did", "will",
|
| 64 |
+
"would", "could", "should", "may", "might", "shall", "can",
|
| 65 |
+
"this", "that", "these", "those", "i", "you", "he", "she",
|
| 66 |
+
"it", "we", "they", "me", "him", "her", "us", "them", "my",
|
| 67 |
+
"your", "his", "its", "our", "their", "what", "which", "who",
|
| 68 |
+
"whom", "when", "where", "why", "how", "all", "each", "every",
|
| 69 |
+
"both", "few", "more", "most", "other", "some", "such", "no",
|
| 70 |
+
"not", "only", "own", "same", "so", "than", "too", "very",
|
| 71 |
+
"just", "because", "as", "until", "while", "of", "at", "by",
|
| 72 |
+
"for", "with", "about", "against", "between", "through",
|
| 73 |
+
"during", "before", "after", "above", "below", "to", "from",
|
| 74 |
+
"up", "down", "in", "out", "on", "off", "over", "under",
|
| 75 |
+
"again", "further", "then", "once", "here", "there", "and",
|
| 76 |
+
"but", "or", "nor", "if", "also", "into", "however", "new",
|
| 77 |
+
}
|
| 78 |
+
|
| 79 |
+
def __init__(self):
|
| 80 |
+
self._entities: dict[str, Entity] = {}
|
| 81 |
+
self._relationships: list[Relationship] = []
|
| 82 |
+
self._adjacency: dict[str, set[str]] = defaultdict(set)
|
| 83 |
+
|
| 84 |
+
@property
|
| 85 |
+
def entity_count(self) -> int:
|
| 86 |
+
return len(self._entities)
|
| 87 |
+
|
| 88 |
+
@property
|
| 89 |
+
def relationship_count(self) -> int:
|
| 90 |
+
return len(self._relationships)
|
| 91 |
+
|
| 92 |
+
def _normalize_name(self, name: str) -> str:
|
| 93 |
+
"""Normalize entity name for consistent lookup."""
|
| 94 |
+
return name.strip().lower()
|
| 95 |
+
|
| 96 |
+
def _extract_capitalized_phrases(self, text: str) -> list[str]:
|
| 97 |
+
"""Extract multi-word capitalized phrases (likely proper nouns)."""
|
| 98 |
+
# Match sequences of capitalized words (2+ words)
|
| 99 |
+
pattern = r"\b([A-Z][a-z]+(?:\s+[A-Z][a-z]+)+)\b"
|
| 100 |
+
matches = re.findall(pattern, text)
|
| 101 |
+
return matches
|
| 102 |
+
|
| 103 |
+
def _extract_keywords(self, text: str) -> list[str]:
|
| 104 |
+
"""Extract significant keywords from text."""
|
| 105 |
+
words = re.findall(r"\b[a-zA-Z]{3,}\b", text.lower())
|
| 106 |
+
# Filter stopwords and very common words
|
| 107 |
+
keywords = [w for w in words if w not in self.STOPWORDS]
|
| 108 |
+
# Count frequency
|
| 109 |
+
freq: dict[str, int] = defaultdict(int)
|
| 110 |
+
for w in keywords:
|
| 111 |
+
freq[w] += 1
|
| 112 |
+
# Return top keywords by frequency
|
| 113 |
+
sorted_kw = sorted(freq.items(), key=lambda x: x[1], reverse=True)
|
| 114 |
+
return [kw for kw, _ in sorted_kw[:20]]
|
| 115 |
+
|
| 116 |
+
def _classify_entity(self, name: str, context: str) -> str:
|
| 117 |
+
"""Classify an entity based on surrounding context."""
|
| 118 |
+
lower_context = context.lower()
|
| 119 |
+
lower_name = name.lower()
|
| 120 |
+
|
| 121 |
+
for indicator in self.PERSON_INDICATORS:
|
| 122 |
+
if indicator in lower_context and lower_name in lower_context:
|
| 123 |
+
return "person"
|
| 124 |
+
|
| 125 |
+
for indicator in self.PRODUCT_INDICATORS:
|
| 126 |
+
if indicator in lower_context and lower_name in lower_context:
|
| 127 |
+
return "product"
|
| 128 |
+
|
| 129 |
+
for indicator in self.CONCEPT_INDICATORS:
|
| 130 |
+
if indicator in lower_context and lower_name in lower_context:
|
| 131 |
+
return "concept"
|
| 132 |
+
|
| 133 |
+
# Default: if it's a capitalized multi-word phrase, likely a person or org
|
| 134 |
+
if len(name.split()) >= 2:
|
| 135 |
+
return "person"
|
| 136 |
+
return "concept"
|
| 137 |
+
|
| 138 |
+
def add_entities(self, chunks: list[KnowledgeChunk]) -> int:
|
| 139 |
+
"""
|
| 140 |
+
Extract and add entities from knowledge chunks.
|
| 141 |
+
Returns count of new entities added.
|
| 142 |
+
"""
|
| 143 |
+
new_count = 0
|
| 144 |
+
|
| 145 |
+
for chunk in chunks:
|
| 146 |
+
text = chunk.text
|
| 147 |
+
source = chunk.source_id
|
| 148 |
+
|
| 149 |
+
# Extract capitalized phrases as entity candidates
|
| 150 |
+
phrases = self._extract_capitalized_phrases(text)
|
| 151 |
+
|
| 152 |
+
for phrase in phrases:
|
| 153 |
+
key = self._normalize_name(phrase)
|
| 154 |
+
entity_type = self._classify_entity(phrase, text)
|
| 155 |
+
|
| 156 |
+
if key in self._entities:
|
| 157 |
+
self._entities[key].mentions += 1
|
| 158 |
+
if source not in self._entities[key].sources:
|
| 159 |
+
self._entities[key].sources.append(source)
|
| 160 |
+
# Keep up to 5 context snippets
|
| 161 |
+
if len(self._entities[key].context_snippets) < 5:
|
| 162 |
+
snippet = text[:200]
|
| 163 |
+
self._entities[key].context_snippets.append(snippet)
|
| 164 |
+
else:
|
| 165 |
+
self._entities[key] = Entity(
|
| 166 |
+
name=phrase,
|
| 167 |
+
entity_type=entity_type,
|
| 168 |
+
mentions=1,
|
| 169 |
+
sources=[source],
|
| 170 |
+
context_snippets=[text[:200]],
|
| 171 |
+
)
|
| 172 |
+
new_count += 1
|
| 173 |
+
|
| 174 |
+
# Extract keyword-based concepts
|
| 175 |
+
keywords = self._extract_keywords(text)
|
| 176 |
+
for kw in keywords[:5]: # Top 5 keywords per chunk
|
| 177 |
+
key = self._normalize_name(kw)
|
| 178 |
+
if key not in self._entities and len(kw) > 4:
|
| 179 |
+
self._entities[key] = Entity(
|
| 180 |
+
name=kw,
|
| 181 |
+
entity_type="concept",
|
| 182 |
+
mentions=1,
|
| 183 |
+
sources=[source],
|
| 184 |
+
context_snippets=[text[:200]],
|
| 185 |
+
)
|
| 186 |
+
new_count += 1
|
| 187 |
+
elif key in self._entities:
|
| 188 |
+
self._entities[key].mentions += 1
|
| 189 |
+
|
| 190 |
+
# Build relationships between co-occurring entities in same chunk
|
| 191 |
+
chunk_entities = []
|
| 192 |
+
for phrase in phrases:
|
| 193 |
+
chunk_entities.append(self._normalize_name(phrase))
|
| 194 |
+
|
| 195 |
+
for i, e1 in enumerate(chunk_entities):
|
| 196 |
+
for e2 in chunk_entities[i + 1:]:
|
| 197 |
+
if e1 != e2:
|
| 198 |
+
self._add_relationship(e1, e2, "related_to")
|
| 199 |
+
|
| 200 |
+
logger.info(
|
| 201 |
+
f"Knowledge graph updated: {new_count} new entities, "
|
| 202 |
+
f"total {self.entity_count} entities, {self.relationship_count} relationships"
|
| 203 |
+
)
|
| 204 |
+
return new_count
|
| 205 |
+
|
| 206 |
+
def _add_relationship(
|
| 207 |
+
self, source: str, target: str, relation_type: str
|
| 208 |
+
) -> None:
|
| 209 |
+
"""Add or strengthen a relationship between two entities."""
|
| 210 |
+
# Check if relationship already exists
|
| 211 |
+
for rel in self._relationships:
|
| 212 |
+
if (
|
| 213 |
+
(rel.source == source and rel.target == target)
|
| 214 |
+
or (rel.source == target and rel.target == source)
|
| 215 |
+
) and rel.relation_type == relation_type:
|
| 216 |
+
rel.weight += 1.0
|
| 217 |
+
return
|
| 218 |
+
|
| 219 |
+
self._relationships.append(
|
| 220 |
+
Relationship(
|
| 221 |
+
source=source,
|
| 222 |
+
target=target,
|
| 223 |
+
relation_type=relation_type,
|
| 224 |
+
)
|
| 225 |
+
)
|
| 226 |
+
self._adjacency[source].add(target)
|
| 227 |
+
self._adjacency[target].add(source)
|
| 228 |
+
|
| 229 |
+
def query_related(
|
| 230 |
+
self, query: str, max_depth: int = 2, max_results: int = 10
|
| 231 |
+
) -> list[dict]:
|
| 232 |
+
"""
|
| 233 |
+
Find entities and relationships related to a query.
|
| 234 |
+
Uses BFS traversal up to max_depth from matching entities.
|
| 235 |
+
"""
|
| 236 |
+
query_lower = query.lower()
|
| 237 |
+
query_terms = set(query_lower.split())
|
| 238 |
+
|
| 239 |
+
# Find matching entities
|
| 240 |
+
matches: list[str] = []
|
| 241 |
+
for key, entity in self._entities.items():
|
| 242 |
+
if query_lower in key or key in query_lower:
|
| 243 |
+
matches.append(key)
|
| 244 |
+
elif query_terms & set(key.split()):
|
| 245 |
+
matches.append(key)
|
| 246 |
+
|
| 247 |
+
if not matches:
|
| 248 |
+
# Fuzzy: check if any query term is a substring of entity name
|
| 249 |
+
for key in self._entities:
|
| 250 |
+
for term in query_terms:
|
| 251 |
+
if len(term) > 3 and term in key:
|
| 252 |
+
matches.append(key)
|
| 253 |
+
break
|
| 254 |
+
|
| 255 |
+
# BFS from matches
|
| 256 |
+
visited: set[str] = set()
|
| 257 |
+
results: list[dict] = []
|
| 258 |
+
queue: list[tuple[str, int]] = [(m, 0) for m in matches]
|
| 259 |
+
|
| 260 |
+
while queue and len(results) < max_results:
|
| 261 |
+
current, depth = queue.pop(0)
|
| 262 |
+
if current in visited:
|
| 263 |
+
continue
|
| 264 |
+
visited.add(current)
|
| 265 |
+
|
| 266 |
+
if current in self._entities:
|
| 267 |
+
entity = self._entities[current]
|
| 268 |
+
results.append(
|
| 269 |
+
{
|
| 270 |
+
"name": entity.name,
|
| 271 |
+
"type": entity.entity_type,
|
| 272 |
+
"mentions": entity.mentions,
|
| 273 |
+
"sources": entity.sources,
|
| 274 |
+
"depth": depth,
|
| 275 |
+
}
|
| 276 |
+
)
|
| 277 |
+
|
| 278 |
+
if depth < max_depth:
|
| 279 |
+
for neighbor in self._adjacency.get(current, set()):
|
| 280 |
+
if neighbor not in visited:
|
| 281 |
+
queue.append((neighbor, depth + 1))
|
| 282 |
+
|
| 283 |
+
return results
|
| 284 |
+
|
| 285 |
+
def get_summary(self) -> dict:
|
| 286 |
+
"""Get a summary of the knowledge graph."""
|
| 287 |
+
type_counts: dict[str, int] = defaultdict(int)
|
| 288 |
+
for entity in self._entities.values():
|
| 289 |
+
type_counts[entity.entity_type] += 1
|
| 290 |
+
|
| 291 |
+
top_entities = sorted(
|
| 292 |
+
self._entities.values(),
|
| 293 |
+
key=lambda e: e.mentions,
|
| 294 |
+
reverse=True,
|
| 295 |
+
)[:10]
|
| 296 |
+
|
| 297 |
+
return {
|
| 298 |
+
"total_entities": self.entity_count,
|
| 299 |
+
"total_relationships": self.relationship_count,
|
| 300 |
+
"entity_types": dict(type_counts),
|
| 301 |
+
"top_entities": [
|
| 302 |
+
{"name": e.name, "type": e.entity_type, "mentions": e.mentions}
|
| 303 |
+
for e in top_entities
|
| 304 |
+
],
|
| 305 |
+
}
|
eden_os/scholar/media_analyzer.py
ADDED
|
@@ -0,0 +1,201 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
EDEN OS — Scholar Engine: Media Analyzer
|
| 3 |
+
Batch processing controller for all ingestion jobs.
|
| 4 |
+
Tracks jobs, processes pending, returns summaries.
|
| 5 |
+
"""
|
| 6 |
+
|
| 7 |
+
import uuid
|
| 8 |
+
from dataclasses import dataclass, field
|
| 9 |
+
from enum import Enum
|
| 10 |
+
|
| 11 |
+
from loguru import logger
|
| 12 |
+
|
| 13 |
+
from eden_os.shared.types import KnowledgeChunk, IngestionResult, KnowledgeSummary
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
class JobStatus(Enum):
|
| 17 |
+
PENDING = "pending"
|
| 18 |
+
PROCESSING = "processing"
|
| 19 |
+
COMPLETED = "completed"
|
| 20 |
+
FAILED = "failed"
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
@dataclass
|
| 24 |
+
class IngestionJob:
|
| 25 |
+
"""Tracks a single ingestion job."""
|
| 26 |
+
job_id: str
|
| 27 |
+
source_type: str # "youtube", "audiobook", "url"
|
| 28 |
+
source: str # URL or file path
|
| 29 |
+
status: JobStatus = JobStatus.PENDING
|
| 30 |
+
chunks: list[KnowledgeChunk] = field(default_factory=list)
|
| 31 |
+
result: IngestionResult | None = None
|
| 32 |
+
error: str | None = None
|
| 33 |
+
|
| 34 |
+
|
| 35 |
+
class MediaAnalyzer:
|
| 36 |
+
"""
|
| 37 |
+
Batch processing controller for media ingestion.
|
| 38 |
+
Tracks all jobs and provides analyze_all() for batch processing.
|
| 39 |
+
"""
|
| 40 |
+
|
| 41 |
+
def __init__(self):
|
| 42 |
+
self._jobs: dict[str, IngestionJob] = {}
|
| 43 |
+
self._completed_results: list[IngestionResult] = []
|
| 44 |
+
|
| 45 |
+
def add_job(self, source_type: str, source: str) -> str:
|
| 46 |
+
"""Register a new ingestion job. Returns job_id."""
|
| 47 |
+
job_id = uuid.uuid4().hex[:12]
|
| 48 |
+
self._jobs[job_id] = IngestionJob(
|
| 49 |
+
job_id=job_id,
|
| 50 |
+
source_type=source_type,
|
| 51 |
+
source=source,
|
| 52 |
+
)
|
| 53 |
+
logger.info(f"Registered ingestion job {job_id}: {source_type} -> {source}")
|
| 54 |
+
return job_id
|
| 55 |
+
|
| 56 |
+
def get_pending_jobs(self) -> list[IngestionJob]:
|
| 57 |
+
"""Get all pending (unprocessed) jobs."""
|
| 58 |
+
return [
|
| 59 |
+
job for job in self._jobs.values() if job.status == JobStatus.PENDING
|
| 60 |
+
]
|
| 61 |
+
|
| 62 |
+
def mark_processing(self, job_id: str) -> None:
|
| 63 |
+
"""Mark a job as currently processing."""
|
| 64 |
+
if job_id in self._jobs:
|
| 65 |
+
self._jobs[job_id].status = JobStatus.PROCESSING
|
| 66 |
+
|
| 67 |
+
def mark_completed(
|
| 68 |
+
self, job_id: str, chunks: list[KnowledgeChunk], result: IngestionResult
|
| 69 |
+
) -> None:
|
| 70 |
+
"""Mark a job as completed with results."""
|
| 71 |
+
if job_id in self._jobs:
|
| 72 |
+
self._jobs[job_id].status = JobStatus.COMPLETED
|
| 73 |
+
self._jobs[job_id].chunks = chunks
|
| 74 |
+
self._jobs[job_id].result = result
|
| 75 |
+
self._completed_results.append(result)
|
| 76 |
+
|
| 77 |
+
def mark_failed(self, job_id: str, error: str) -> None:
|
| 78 |
+
"""Mark a job as failed."""
|
| 79 |
+
if job_id in self._jobs:
|
| 80 |
+
self._jobs[job_id].status = JobStatus.FAILED
|
| 81 |
+
self._jobs[job_id].error = error
|
| 82 |
+
self._completed_results.append(
|
| 83 |
+
IngestionResult(
|
| 84 |
+
job_id=job_id,
|
| 85 |
+
source_type=self._jobs[job_id].source_type,
|
| 86 |
+
chunks_created=0,
|
| 87 |
+
status="failed",
|
| 88 |
+
error=error,
|
| 89 |
+
)
|
| 90 |
+
)
|
| 91 |
+
|
| 92 |
+
async def analyze_all(
|
| 93 |
+
self,
|
| 94 |
+
youtube_ingestor,
|
| 95 |
+
audiobook_ingestor,
|
| 96 |
+
url_ingestor,
|
| 97 |
+
rag_retriever,
|
| 98 |
+
knowledge_graph,
|
| 99 |
+
) -> KnowledgeSummary:
|
| 100 |
+
"""
|
| 101 |
+
Process all pending ingestion jobs.
|
| 102 |
+
Routes each job to the appropriate ingestor,
|
| 103 |
+
adds chunks to RAG store and knowledge graph.
|
| 104 |
+
Returns KnowledgeSummary with totals.
|
| 105 |
+
"""
|
| 106 |
+
pending = self.get_pending_jobs()
|
| 107 |
+
if not pending:
|
| 108 |
+
logger.info("No pending jobs to process")
|
| 109 |
+
return self.get_summary(rag_retriever)
|
| 110 |
+
|
| 111 |
+
logger.info(f"Processing {len(pending)} pending ingestion jobs...")
|
| 112 |
+
|
| 113 |
+
total_new_chunks = 0
|
| 114 |
+
source_counts: dict[str, int] = {}
|
| 115 |
+
|
| 116 |
+
for job in pending:
|
| 117 |
+
self.mark_processing(job.job_id)
|
| 118 |
+
logger.info(f"Processing job {job.job_id}: {job.source_type} -> {job.source}")
|
| 119 |
+
|
| 120 |
+
try:
|
| 121 |
+
chunks: list[KnowledgeChunk] = []
|
| 122 |
+
result: IngestionResult | None = None
|
| 123 |
+
|
| 124 |
+
if job.source_type == "youtube":
|
| 125 |
+
chunks, result = await youtube_ingestor.ingest(job.source)
|
| 126 |
+
elif job.source_type == "audiobook":
|
| 127 |
+
chunks, result = await audiobook_ingestor.ingest(job.source)
|
| 128 |
+
elif job.source_type == "url":
|
| 129 |
+
chunks, result = await url_ingestor.ingest(job.source)
|
| 130 |
+
else:
|
| 131 |
+
raise ValueError(f"Unknown source type: {job.source_type}")
|
| 132 |
+
|
| 133 |
+
if chunks:
|
| 134 |
+
# Add to RAG store
|
| 135 |
+
rag_retriever.add_chunks(chunks)
|
| 136 |
+
# Add to knowledge graph
|
| 137 |
+
knowledge_graph.add_entities(chunks)
|
| 138 |
+
|
| 139 |
+
count = len(chunks)
|
| 140 |
+
total_new_chunks += count
|
| 141 |
+
source_counts[job.source_type] = (
|
| 142 |
+
source_counts.get(job.source_type, 0) + count
|
| 143 |
+
)
|
| 144 |
+
|
| 145 |
+
self.mark_completed(job.job_id, chunks, result)
|
| 146 |
+
logger.info(
|
| 147 |
+
f"Job {job.job_id} completed: {len(chunks)} chunks created"
|
| 148 |
+
)
|
| 149 |
+
|
| 150 |
+
except Exception as e:
|
| 151 |
+
error_msg = f"Job {job.job_id} failed: {str(e)}"
|
| 152 |
+
logger.error(error_msg)
|
| 153 |
+
self.mark_failed(job.job_id, str(e))
|
| 154 |
+
|
| 155 |
+
logger.info(
|
| 156 |
+
f"Batch processing complete: {total_new_chunks} new chunks "
|
| 157 |
+
f"across {len(pending)} jobs"
|
| 158 |
+
)
|
| 159 |
+
|
| 160 |
+
return self.get_summary(rag_retriever)
|
| 161 |
+
|
| 162 |
+
def get_summary(self, rag_retriever=None) -> KnowledgeSummary:
|
| 163 |
+
"""Get summary of all ingested knowledge."""
|
| 164 |
+
if rag_retriever:
|
| 165 |
+
total = rag_retriever.get_total_chunks()
|
| 166 |
+
sources = rag_retriever.get_source_counts()
|
| 167 |
+
else:
|
| 168 |
+
# Fall back to counting from completed jobs
|
| 169 |
+
total = sum(
|
| 170 |
+
len(j.chunks) for j in self._jobs.values()
|
| 171 |
+
if j.status == JobStatus.COMPLETED
|
| 172 |
+
)
|
| 173 |
+
sources: dict[str, int] = {}
|
| 174 |
+
for j in self._jobs.values():
|
| 175 |
+
if j.status == JobStatus.COMPLETED:
|
| 176 |
+
sources[j.source_type] = (
|
| 177 |
+
sources.get(j.source_type, 0) + len(j.chunks)
|
| 178 |
+
)
|
| 179 |
+
|
| 180 |
+
pending_count = len(self.get_pending_jobs())
|
| 181 |
+
status = "ready" if pending_count == 0 else f"processing ({pending_count} pending)"
|
| 182 |
+
|
| 183 |
+
return KnowledgeSummary(
|
| 184 |
+
total_chunks=total,
|
| 185 |
+
sources=sources,
|
| 186 |
+
status=status,
|
| 187 |
+
)
|
| 188 |
+
|
| 189 |
+
def get_job_status(self, job_id: str) -> dict | None:
|
| 190 |
+
"""Get status of a specific job."""
|
| 191 |
+
job = self._jobs.get(job_id)
|
| 192 |
+
if not job:
|
| 193 |
+
return None
|
| 194 |
+
return {
|
| 195 |
+
"job_id": job.job_id,
|
| 196 |
+
"source_type": job.source_type,
|
| 197 |
+
"source": job.source,
|
| 198 |
+
"status": job.status.value,
|
| 199 |
+
"chunks_created": len(job.chunks),
|
| 200 |
+
"error": job.error,
|
| 201 |
+
}
|
eden_os/scholar/rag_retriever.py
ADDED
|
@@ -0,0 +1,237 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
EDEN OS — Scholar Engine: RAG Retriever
|
| 3 |
+
ChromaDB-backed retrieval with sentence-transformers embeddings.
|
| 4 |
+
Supports hybrid search (semantic + keyword).
|
| 5 |
+
"""
|
| 6 |
+
|
| 7 |
+
import os
|
| 8 |
+
import uuid
|
| 9 |
+
from pathlib import Path
|
| 10 |
+
|
| 11 |
+
from loguru import logger
|
| 12 |
+
|
| 13 |
+
from eden_os.shared.types import KnowledgeChunk
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
class RAGRetriever:
|
| 17 |
+
"""
|
| 18 |
+
RAG retrieval using ChromaDB persistent storage and
|
| 19 |
+
sentence-transformers (all-MiniLM-L6-v2) embeddings.
|
| 20 |
+
"""
|
| 21 |
+
|
| 22 |
+
COLLECTION_NAME = "eden_knowledge"
|
| 23 |
+
DEFAULT_DB_PATH = os.path.expanduser("~/EDEN-OS/data/chromadb")
|
| 24 |
+
|
| 25 |
+
def __init__(self, db_path: str | None = None):
|
| 26 |
+
self._db_path = db_path or self.DEFAULT_DB_PATH
|
| 27 |
+
Path(self._db_path).mkdir(parents=True, exist_ok=True)
|
| 28 |
+
|
| 29 |
+
self._client = None
|
| 30 |
+
self._collection = None
|
| 31 |
+
self._embedding_model = None
|
| 32 |
+
|
| 33 |
+
def _get_embedding_model(self):
|
| 34 |
+
"""Lazy-load sentence-transformers model."""
|
| 35 |
+
if self._embedding_model is None:
|
| 36 |
+
from sentence_transformers import SentenceTransformer
|
| 37 |
+
|
| 38 |
+
logger.info("Loading embedding model: all-MiniLM-L6-v2")
|
| 39 |
+
self._embedding_model = SentenceTransformer("all-MiniLM-L6-v2")
|
| 40 |
+
logger.info("Embedding model loaded.")
|
| 41 |
+
return self._embedding_model
|
| 42 |
+
|
| 43 |
+
def _get_collection(self):
|
| 44 |
+
"""Lazy-init ChromaDB client and collection."""
|
| 45 |
+
if self._collection is None:
|
| 46 |
+
import chromadb
|
| 47 |
+
|
| 48 |
+
logger.info(f"Initializing ChromaDB at: {self._db_path}")
|
| 49 |
+
self._client = chromadb.PersistentClient(path=self._db_path)
|
| 50 |
+
self._collection = self._client.get_or_create_collection(
|
| 51 |
+
name=self.COLLECTION_NAME,
|
| 52 |
+
metadata={"hnsw:space": "cosine"},
|
| 53 |
+
)
|
| 54 |
+
logger.info(
|
| 55 |
+
f"ChromaDB collection '{self.COLLECTION_NAME}' ready "
|
| 56 |
+
f"({self._collection.count()} existing documents)"
|
| 57 |
+
)
|
| 58 |
+
return self._collection
|
| 59 |
+
|
| 60 |
+
def _embed_texts(self, texts: list[str]) -> list[list[float]]:
|
| 61 |
+
"""Generate embeddings for a list of texts."""
|
| 62 |
+
model = self._get_embedding_model()
|
| 63 |
+
embeddings = model.encode(texts, show_progress_bar=False, convert_to_numpy=True)
|
| 64 |
+
return embeddings.tolist()
|
| 65 |
+
|
| 66 |
+
def add_chunks(self, chunks: list[KnowledgeChunk]) -> int:
|
| 67 |
+
"""
|
| 68 |
+
Add knowledge chunks to ChromaDB with embeddings.
|
| 69 |
+
Returns number of chunks added.
|
| 70 |
+
"""
|
| 71 |
+
if not chunks:
|
| 72 |
+
return 0
|
| 73 |
+
|
| 74 |
+
collection = self._get_collection()
|
| 75 |
+
|
| 76 |
+
# Prepare data for ChromaDB
|
| 77 |
+
ids: list[str] = []
|
| 78 |
+
documents: list[str] = []
|
| 79 |
+
metadatas: list[dict] = []
|
| 80 |
+
|
| 81 |
+
for chunk in chunks:
|
| 82 |
+
doc_id = uuid.uuid4().hex
|
| 83 |
+
ids.append(doc_id)
|
| 84 |
+
documents.append(chunk.text)
|
| 85 |
+
metadatas.append(
|
| 86 |
+
{
|
| 87 |
+
"source_type": chunk.source_type,
|
| 88 |
+
"source_id": chunk.source_id,
|
| 89 |
+
"timestamp": chunk.timestamp or "",
|
| 90 |
+
"chapter": chunk.chapter or "",
|
| 91 |
+
**{
|
| 92 |
+
k: str(v)
|
| 93 |
+
for k, v in chunk.metadata.items()
|
| 94 |
+
if isinstance(v, (str, int, float, bool))
|
| 95 |
+
},
|
| 96 |
+
}
|
| 97 |
+
)
|
| 98 |
+
|
| 99 |
+
# Generate embeddings
|
| 100 |
+
logger.info(f"Generating embeddings for {len(documents)} chunks...")
|
| 101 |
+
embeddings = self._embed_texts(documents)
|
| 102 |
+
|
| 103 |
+
# Batch insert (ChromaDB has a batch limit)
|
| 104 |
+
batch_size = 500
|
| 105 |
+
added = 0
|
| 106 |
+
for i in range(0, len(ids), batch_size):
|
| 107 |
+
end = min(i + batch_size, len(ids))
|
| 108 |
+
collection.add(
|
| 109 |
+
ids=ids[i:end],
|
| 110 |
+
documents=documents[i:end],
|
| 111 |
+
metadatas=metadatas[i:end],
|
| 112 |
+
embeddings=embeddings[i:end],
|
| 113 |
+
)
|
| 114 |
+
added += end - i
|
| 115 |
+
|
| 116 |
+
logger.info(f"Added {added} chunks to ChromaDB (total: {collection.count()})")
|
| 117 |
+
return added
|
| 118 |
+
|
| 119 |
+
def retrieve(
|
| 120 |
+
self,
|
| 121 |
+
query: str,
|
| 122 |
+
top_k: int = 5,
|
| 123 |
+
source_type: str | None = None,
|
| 124 |
+
) -> list[KnowledgeChunk]:
|
| 125 |
+
"""
|
| 126 |
+
Retrieve relevant knowledge chunks via hybrid search.
|
| 127 |
+
Combines semantic similarity with keyword matching.
|
| 128 |
+
"""
|
| 129 |
+
collection = self._get_collection()
|
| 130 |
+
|
| 131 |
+
if collection.count() == 0:
|
| 132 |
+
logger.warning("Knowledge base is empty, nothing to retrieve")
|
| 133 |
+
return []
|
| 134 |
+
|
| 135 |
+
# Build where filter for source type if specified
|
| 136 |
+
where_filter = None
|
| 137 |
+
if source_type:
|
| 138 |
+
where_filter = {"source_type": source_type}
|
| 139 |
+
|
| 140 |
+
# Semantic search with embeddings
|
| 141 |
+
query_embedding = self._embed_texts([query])[0]
|
| 142 |
+
|
| 143 |
+
try:
|
| 144 |
+
semantic_results = collection.query(
|
| 145 |
+
query_embeddings=[query_embedding],
|
| 146 |
+
n_results=min(top_k * 2, collection.count()),
|
| 147 |
+
where=where_filter,
|
| 148 |
+
include=["documents", "metadatas", "distances"],
|
| 149 |
+
)
|
| 150 |
+
except Exception as e:
|
| 151 |
+
logger.error(f"Semantic search failed: {e}")
|
| 152 |
+
semantic_results = {"ids": [[]], "documents": [[]], "metadatas": [[]], "distances": [[]]}
|
| 153 |
+
|
| 154 |
+
# Keyword search (ChromaDB document contains filter)
|
| 155 |
+
query_words = [w for w in query.lower().split() if len(w) > 3]
|
| 156 |
+
keyword_results_ids: set[str] = set()
|
| 157 |
+
|
| 158 |
+
if query_words:
|
| 159 |
+
try:
|
| 160 |
+
# Search for documents containing key query terms
|
| 161 |
+
keyword_filter = {
|
| 162 |
+
"$or": [
|
| 163 |
+
{"source_type": {"$in": ["youtube", "audiobook", "url"]}}
|
| 164 |
+
]
|
| 165 |
+
}
|
| 166 |
+
kw_results = collection.query(
|
| 167 |
+
query_texts=[query],
|
| 168 |
+
n_results=min(top_k, collection.count()),
|
| 169 |
+
where=where_filter,
|
| 170 |
+
include=["documents", "metadatas", "distances"],
|
| 171 |
+
)
|
| 172 |
+
if kw_results["ids"] and kw_results["ids"][0]:
|
| 173 |
+
keyword_results_ids = set(kw_results["ids"][0])
|
| 174 |
+
except Exception:
|
| 175 |
+
pass # Keyword search is supplementary
|
| 176 |
+
|
| 177 |
+
# Merge and deduplicate results, prioritizing semantic matches
|
| 178 |
+
seen_ids: set[str] = set()
|
| 179 |
+
chunks: list[KnowledgeChunk] = []
|
| 180 |
+
|
| 181 |
+
result_ids = semantic_results["ids"][0] if semantic_results["ids"] else []
|
| 182 |
+
result_docs = semantic_results["documents"][0] if semantic_results["documents"] else []
|
| 183 |
+
result_metas = semantic_results["metadatas"][0] if semantic_results["metadatas"] else []
|
| 184 |
+
result_dists = semantic_results["distances"][0] if semantic_results["distances"] else []
|
| 185 |
+
|
| 186 |
+
for idx, doc_id in enumerate(result_ids):
|
| 187 |
+
if doc_id in seen_ids:
|
| 188 |
+
continue
|
| 189 |
+
seen_ids.add(doc_id)
|
| 190 |
+
|
| 191 |
+
meta = result_metas[idx] if idx < len(result_metas) else {}
|
| 192 |
+
doc = result_docs[idx] if idx < len(result_docs) else ""
|
| 193 |
+
distance = result_dists[idx] if idx < len(result_dists) else 1.0
|
| 194 |
+
|
| 195 |
+
# Boost score if also found in keyword search
|
| 196 |
+
is_keyword_match = doc_id in keyword_results_ids
|
| 197 |
+
|
| 198 |
+
chunk = KnowledgeChunk(
|
| 199 |
+
text=doc,
|
| 200 |
+
source_type=meta.get("source_type", "unknown"),
|
| 201 |
+
source_id=meta.get("source_id", ""),
|
| 202 |
+
timestamp=meta.get("timestamp") or None,
|
| 203 |
+
chapter=meta.get("chapter") or None,
|
| 204 |
+
metadata={
|
| 205 |
+
"relevance_score": 1.0 - distance,
|
| 206 |
+
"keyword_match": is_keyword_match,
|
| 207 |
+
},
|
| 208 |
+
)
|
| 209 |
+
chunks.append(chunk)
|
| 210 |
+
|
| 211 |
+
if len(chunks) >= top_k:
|
| 212 |
+
break
|
| 213 |
+
|
| 214 |
+
logger.info(f"Retrieved {len(chunks)} chunks for query: '{query[:50]}...'")
|
| 215 |
+
return chunks
|
| 216 |
+
|
| 217 |
+
def get_total_chunks(self) -> int:
|
| 218 |
+
"""Get total number of chunks in the knowledge base."""
|
| 219 |
+
collection = self._get_collection()
|
| 220 |
+
return collection.count()
|
| 221 |
+
|
| 222 |
+
def get_source_counts(self) -> dict[str, int]:
|
| 223 |
+
"""Get chunk counts by source type."""
|
| 224 |
+
collection = self._get_collection()
|
| 225 |
+
counts: dict[str, int] = {}
|
| 226 |
+
|
| 227 |
+
for source_type in ["youtube", "audiobook", "url"]:
|
| 228 |
+
try:
|
| 229 |
+
results = collection.get(
|
| 230 |
+
where={"source_type": source_type},
|
| 231 |
+
include=[],
|
| 232 |
+
)
|
| 233 |
+
counts[source_type] = len(results["ids"])
|
| 234 |
+
except Exception:
|
| 235 |
+
counts[source_type] = 0
|
| 236 |
+
|
| 237 |
+
return counts
|