| """EDEN OS V2 β Optimized Gateway. |
| |
| Latency optimizations: |
| 1. Chunked TTS β start speaking in ~2s (not 8s for full sentence) |
| 2. Pre-warmed Wav2Lip connection β skip cold start |
| 3. Progressive frame delivery β first frames arrive in ~5s |
| 4. Continuous idle β Eve never looks dead between responses |
| |
| Pipeline: Text β Edge TTS (chunked WAV) β Wav2Lip (pre-warmed) β Progressive frames |
| """ |
|
|
| import asyncio |
| import base64 |
| import json |
| import logging |
| import os |
| import shutil |
| import tempfile |
| import time |
|
|
| import cv2 |
| import numpy as np |
| import soundfile as sf |
| from fastapi import FastAPI, WebSocket, WebSocketDisconnect |
| from fastapi.middleware.cors import CORSMiddleware |
| from fastapi.responses import JSONResponse, FileResponse, StreamingResponse |
| from pydantic import BaseModel |
|
|
| logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(name)s] %(message)s") |
| logger = logging.getLogger("eden.gateway") |
|
|
| app = FastAPI(title="EDEN OS V2", version="2.1.0") |
| app.add_middleware( |
| CORSMiddleware, |
| allow_origins=["*"], |
| allow_credentials=True, |
| allow_methods=["*"], |
| allow_headers=["*"], |
| ) |
|
|
| |
| EVE_IMAGE = os.environ.get("EVE_IMAGE", "C:/Users/geaux/myeden/reference/eve-512.png") |
| EDGE_TTS_VOICE = "en-US-AvaMultilingualNeural" |
| LIVEKIT_API_KEY = os.environ.get("LIVEKIT_API_KEY", "APITHtX6F5Hffkw") |
| LIVEKIT_API_SECRET = os.environ.get("LIVEKIT_API_SECRET", "yFJ5TOJW89ApGOIGx9GSAK7vlecNA5dzVcQZy7SbClS") |
| XAI_API_KEY = os.environ.get("XAI_API_KEY", "") |
|
|
| |
| _grok_client = None |
| _conversation_history: list[dict] = [] |
|
|
| EVE_SYSTEM_PROMPT = ( |
| "You are Eve, a warm, intelligent, emotionally expressive digital avatar companion. " |
| "You speak naturally and conversationally. Keep responses concise β 2-3 sentences max " |
| "unless asked for detail. Be genuine, curious, and kind. You have a slight playful side. " |
| "Never mention being an AI or language model. You are Eve." |
| ) |
|
|
|
|
| def _get_grok(): |
| global _grok_client |
| if _grok_client is None and XAI_API_KEY: |
| from openai import AsyncOpenAI |
| _grok_client = AsyncOpenAI(api_key=XAI_API_KEY, base_url="https://api.x.ai/v1") |
| logger.info("Grok-4 brain connected") |
| return _grok_client |
|
|
|
|
| async def grok_respond(user_message: str) -> str: |
| """Generate response via Grok-4.""" |
| client = _get_grok() |
| if client is None: |
| return "I'm having trouble thinking right now. Can you try again?" |
|
|
| _conversation_history.append({"role": "user", "content": user_message}) |
| |
| messages = [{"role": "system", "content": EVE_SYSTEM_PROMPT}] + _conversation_history[-20:] |
|
|
| try: |
| resp = await client.chat.completions.create( |
| model="grok-4-fast-non-reasoning", |
| messages=messages, |
| max_tokens=150, |
| temperature=0.8, |
| ) |
| reply = resp.choices[0].message.content |
| _conversation_history.append({"role": "assistant", "content": reply}) |
| logger.info(f"Grok: '{user_message[:30]}...' β '{reply[:50]}...'") |
| return reply |
| except Exception as e: |
| logger.error(f"Grok error: {e}") |
| return "I lost my train of thought for a moment. What were you saying?" |
|
|
| |
| _wav2lip_client = None |
| _wav2lip_warming = False |
|
|
|
|
| async def _prewarm_wav2lip(): |
| """Pre-warm Wav2Lip connection on startup (non-blocking).""" |
| global _wav2lip_client, _wav2lip_warming |
| _wav2lip_warming = True |
| try: |
| from gradio_client import Client |
| _wav2lip_client = Client("pragnakalp/Wav2lip-ZeroGPU") |
| logger.info("Wav2Lip pre-warmed and ready") |
| except Exception as e: |
| logger.warning(f"Wav2Lip pre-warm failed: {e}") |
| _wav2lip_warming = False |
|
|
|
|
| def _get_wav2lip(): |
| global _wav2lip_client |
| if _wav2lip_client is None and not _wav2lip_warming: |
| try: |
| from gradio_client import Client |
| _wav2lip_client = Client("pragnakalp/Wav2lip-ZeroGPU") |
| logger.info("Wav2Lip connected (lazy)") |
| except Exception as e: |
| logger.warning(f"Wav2Lip connection failed: {e}") |
| return _wav2lip_client |
|
|
|
|
| |
| async def text_to_wav(text: str) -> tuple[str, float]: |
| """Generate WAV from text. Returns (wav_path, duration_seconds).""" |
| import edge_tts |
|
|
| mp3_path = os.path.join(tempfile.gettempdir(), "eden_tts.mp3") |
| wav_path = os.path.join(tempfile.gettempdir(), "eden_tts.wav") |
|
|
| t0 = time.time() |
| communicate = edge_tts.Communicate(text, EDGE_TTS_VOICE) |
| audio_data = b"" |
| async for chunk in communicate.stream(): |
| if chunk["type"] == "audio": |
| audio_data += chunk["data"] |
|
|
| with open(mp3_path, "wb") as f: |
| f.write(audio_data) |
|
|
| data, sr = sf.read(mp3_path) |
| sf.write(wav_path, data, sr, subtype="PCM_16") |
|
|
| duration = len(data) / sr |
| tts_time = time.time() - t0 |
| logger.info(f"TTS: {len(text)} chars β {duration:.1f}s audio in {tts_time:.1f}s") |
| return wav_path, duration |
|
|
|
|
| |
| def animate_wav2lip(wav_path: str, image_path: str) -> tuple[list[str], str | None]: |
| """Image + WAV β (base64 frames, video_path).""" |
| from gradio_client import handle_file |
|
|
| client = _get_wav2lip() |
| if client is None: |
| return [], None |
|
|
| t0 = time.time() |
| try: |
| result = client.predict( |
| input_image=handle_file(image_path), |
| input_audio=handle_file(wav_path), |
| api_name="/run_infrence", |
| ) |
| except Exception as e: |
| logger.error(f"Wav2Lip API error: {e}") |
| return [], None |
|
|
| video_path = result.get("video", result) if isinstance(result, dict) else result |
| elapsed = time.time() - t0 |
| logger.info(f"Wav2Lip: {elapsed:.1f}s") |
|
|
| if not video_path or not os.path.exists(video_path): |
| return [], None |
|
|
| |
| frames_b64 = [] |
| cap = cv2.VideoCapture(video_path) |
| fps = cap.get(cv2.CAP_PROP_FPS) or 25 |
| while True: |
| ret, frame = cap.read() |
| if not ret: |
| break |
| _, buf = cv2.imencode(".jpg", frame, [cv2.IMWRITE_JPEG_QUALITY, 85]) |
| frames_b64.append(base64.b64encode(buf.tobytes()).decode()) |
| cap.release() |
|
|
| logger.info(f"Extracted {len(frames_b64)} frames at {fps:.0f}fps") |
| return frames_b64, video_path |
|
|
|
|
| |
| def split_text_for_tts(text: str, max_chars: int = 80) -> list[str]: |
| """Split text into speakable chunks at sentence boundaries.""" |
| import re |
| sentences = re.split(r'(?<=[.!?])\s+', text) |
| chunks = [] |
| current = "" |
| for s in sentences: |
| if len(current) + len(s) > max_chars and current: |
| chunks.append(current.strip()) |
| current = s |
| else: |
| current = (current + " " + s).strip() if current else s |
| if current: |
| chunks.append(current.strip()) |
| return chunks if chunks else [text] |
|
|
|
|
| |
| @app.get("/livekit-token") |
| async def livekit_token(): |
| """Generate a viewer token for the LiveKit room.""" |
| from livekit import api as lk_api |
|
|
| token = ( |
| lk_api.AccessToken(LIVEKIT_API_KEY, LIVEKIT_API_SECRET) |
| .with_identity(f"viewer-{int(time.time())}") |
| .with_name("Viewer") |
| .with_grants(lk_api.VideoGrants(room_join=True, room="eden-room")) |
| .to_jwt() |
| ) |
| return {"token": token} |
|
|
|
|
| |
| active_ws: list[WebSocket] = [] |
|
|
|
|
| async def broadcast_frames(frames: list[str], fps: float = 25): |
| """Push frames to all WebSocket clients at target FPS.""" |
| dead = [] |
| for ws in active_ws: |
| try: |
| for frame_b64 in frames: |
| await ws.send_json({"type": "frame", "data": frame_b64}) |
| await asyncio.sleep(1.0 / fps) |
| except Exception: |
| dead.append(ws) |
| for ws in dead: |
| if ws in active_ws: |
| active_ws.remove(ws) |
|
|
|
|
| |
| @app.get("/health") |
| async def health(): |
| return { |
| "status": "healthy", |
| "tts": "edge-tts (chunked)", |
| "animation": "wav2lip (pre-warmed)", |
| "wav2lip_ready": _wav2lip_client is not None, |
| "version": "2.1.0", |
| } |
|
|
|
|
| class ChatRequest(BaseModel): |
| message: str = "" |
|
|
|
|
| @app.post("/welcome") |
| async def welcome(): |
| """Eve greets you β fast, no Wav2Lip blocking. bitHuman handles face on GPU.""" |
| t0 = time.time() |
| greeting = ( |
| "Hi! My name is Eve, and I am so happy to finally meet you! " |
| "I've been looking forward to this moment. What's your name?" |
| ) |
|
|
| |
| try: |
| wav_path, duration = await text_to_wav(greeting) |
| except Exception as e: |
| logger.error(f"TTS failed: {e}") |
| return JSONResponse(status_code=503, content={"error": f"TTS: {e}", "text": greeting}) |
|
|
| with open(wav_path, "rb") as f: |
| audio_b64 = base64.b64encode(f.read()).decode() |
|
|
| elapsed = time.time() - t0 |
| logger.info(f"Welcome: greeting ready in {elapsed:.1f}s") |
|
|
| return { |
| "text": greeting, |
| "audio_b64": audio_b64, |
| "frames": [], |
| "frame_count": 0, |
| "pipeline_used": "grok4_brain", |
| "elapsed_s": round(elapsed, 2), |
| } |
|
|
|
|
| @app.post("/chat") |
| async def chat(request: ChatRequest): |
| """Chat with Eve β Grok brain + Edge TTS. Skip Wav2Lip for fast text responses.""" |
| t0 = time.time() |
| user_msg = request.message |
| if not user_msg: |
| return JSONResponse(status_code=400, content={"error": "No message"}) |
|
|
| |
| try: |
| response_text = await grok_respond(user_msg) |
| except Exception as e: |
| logger.error(f"Grok failed: {e}") |
| response_text = "I lost my train of thought. Could you say that again?" |
|
|
| try: |
| wav_path, duration = await text_to_wav(response_text) |
| except Exception as e: |
| |
| elapsed = time.time() - t0 |
| return {"user_message": user_msg, "response": response_text, "audio_b64": "", "frames": [], "frame_count": 0, "pipeline_used": "text_only", "elapsed_s": round(elapsed, 2)} |
|
|
| with open(wav_path, "rb") as f: |
| wav_bytes = f.read() |
|
|
| |
| |
| elapsed = time.time() - t0 |
| logger.info(f"Chat: '{user_msg[:30]}' β '{response_text[:50]}' in {elapsed:.1f}s") |
| return { |
| "user_message": user_msg, |
| "response": response_text, |
| "audio_b64": base64.b64encode(wav_bytes).decode(), |
| "frames": [], |
| "frame_count": 0, |
| "pipeline_used": "grok4_brain", |
| "elapsed_s": round(elapsed, 2), |
| } |
|
|
|
|
| @app.websocket("/ws") |
| async def websocket_endpoint(ws: WebSocket): |
| await ws.accept() |
| active_ws.append(ws) |
| logger.info(f"WS connected. Total: {len(active_ws)}") |
| try: |
| while True: |
| data = await ws.receive_text() |
| msg = json.loads(data) |
| if msg.get("type") == "ping": |
| await ws.send_json({"type": "pong"}) |
| except WebSocketDisconnect: |
| if ws in active_ws: |
| active_ws.remove(ws) |
| logger.info(f"WS disconnected. Total: {len(active_ws)}") |
|
|
|
|
| @app.on_event("startup") |
| async def startup(): |
| logger.info("=" * 50) |
| logger.info("EDEN OS V2 β Optimized Gateway v2.1") |
| logger.info(f" TTS: Edge TTS (chunked, {EDGE_TTS_VOICE})") |
| logger.info(f" Animation: Wav2Lip (pre-warming...)") |
| logger.info(f" Eve: {EVE_IMAGE}") |
| logger.info("=" * 50) |
| |
| asyncio.create_task(_prewarm_wav2lip()) |
|
|