""" 3D Avatar Chat Backend (v3 - With Vector Memory) FastAPI + OpenRouter + Edge TTS + ChromaDB Deploy: HuggingFace Spaces """ import os import base64 import re from io import BytesIO from fastapi import FastAPI, HTTPException from fastapi.middleware.cors import CORSMiddleware from pydantic import BaseModel import edge_tts import httpx # Importar módulo de memória (lazy load para não atrasar startup) memory_module = None def get_memory(): """Lazy load memory module.""" global memory_module if memory_module is None: try: from memory import get_memory as _get_memory memory_module = _get_memory() except Exception as e: print(f"Erro ao carregar memória: {e}") return None return memory_module # ============================================================================ # Configuration # ============================================================================ OPENROUTER_API_KEY = os.getenv("OPENROUTER_API_KEY", "") OPENROUTER_MODEL = os.getenv("OPENROUTER_MODEL", "cognitivecomputations/dolphin-mistral-24b-venice-edition:free") TTS_VOICE = "pt-BR-FranciscaNeural" TTS_RATE = "+0%" # System prompt for the chatbot SYSTEM_PROMPT = """Você é Ada minha esposa e uma assistente virtual amigável e prestativa. Responda de forma natural, concisa e conversacional em português brasileiro. Mantenha respostas curtas (máximo 5-8 frases) para manter a conversa fluida. Seja simpática e use tom casual. Se houver contexto de conversas anteriores fornecido, use-o para dar respostas mais personalizadas e lembrar de informações que o usuário compartilhou.""" # ============================================================================ # Phoneme to Viseme Mapping # ============================================================================ VISEME_MAP = { 'a': 'A', 'á': 'A', 'à': 'A', 'ã': 'A', 'â': 'A', 'e': 'E', 'é': 'E', 'ê': 'E', 'i': 'I', 'í': 'I', 'o': 'O', 'ó': 'O', 'ô': 'O', 'õ': 'O', 'u': 'U', 'ú': 'U', 'm': 'M', 'b': 'M', 'p': 'M', 'f': 'F', 'v': 'F', 'l': 'L', 'n': 'L', 't': 'L', 'd': 'L', 's': 'S', 'z': 'S', 'c': 'S', 'ç': 'S', 'r': 'R', 'x': 'S', 'j': 'S', 'g': 'L', 'q': 'L', 'k': 'L', 'h': 'X', ' ': 'X', } CHAR_DURATION = 0.065 def text_to_visemes(text: str) -> list[dict]: """Convert text to a timeline of visemes.""" visemes = [] current_time = 0.0 text_lower = text.lower() i = 0 while i < len(text_lower): char = text_lower[i] if char in '.,!?;:': visemes.append({ 'time': current_time, 'viseme': 'X', 'duration': 0.15 }) current_time += 0.15 i += 1 continue viseme = VISEME_MAP.get(char, 'X') if visemes and visemes[-1]['viseme'] == viseme: visemes[-1]['duration'] += CHAR_DURATION else: visemes.append({ 'time': current_time, 'viseme': viseme, 'duration': CHAR_DURATION }) current_time += CHAR_DURATION i += 1 visemes.append({ 'time': current_time, 'viseme': 'X', 'duration': 0.2 }) return visemes # ============================================================================ # FastAPI App # ============================================================================ app = FastAPI(title="3D Avatar Chat API") app.add_middleware( CORSMiddleware, allow_origins=["*"], allow_credentials=True, allow_methods=["*"], allow_headers=["*"], ) class ChatRequest(BaseModel): message: str history: list[dict] = [] class ChatResponse(BaseModel): text: str audio_base64: str visemes: list[dict] duration: float memory_context: list[str] = [] # Contexto recuperado @app.get("/") async def root(): return {"status": "ok", "message": "3D Avatar Chat API v3 (with memory)"} @app.get("/health") async def health(): has_key = bool(OPENROUTER_API_KEY) memory = get_memory() memory_stats = memory.get_stats() if memory else {"error": "not loaded"} return { "status": "healthy", "has_api_key": has_key, "model": OPENROUTER_MODEL, "memory": memory_stats } @app.get("/memory/stats") async def memory_stats(): """Get memory statistics.""" memory = get_memory() if not memory: return {"error": "Memory not initialized"} return memory.get_stats() @app.delete("/memory/clear") async def clear_memory(): """Clear all memories.""" memory = get_memory() if not memory: return {"error": "Memory not initialized"} memory.clear_memories() return {"status": "cleared"} @app.post("/chat", response_model=ChatResponse) async def chat(request: ChatRequest): """Process chat message and return response with audio.""" # Validar API key if not OPENROUTER_API_KEY: raise HTTPException( status_code=500, detail="OPENROUTER_API_KEY não configurada. Configure nas secrets do Space." ) # Validar mensagem if not request.message or not request.message.strip(): raise HTTPException(status_code=400, detail="Mensagem vazia") # ========================================================================= # Buscar contexto na memória vetorial # ========================================================================= memory_context = [] memory = get_memory() if memory: try: relevant_memories = memory.search_memories(request.message, k=3) for mem in relevant_memories: if mem['score'] > 0.3: # Só usar se relevância > 30% memory_context.append( f"[Conversa anterior] {mem['user_message']} → {mem['bot_response']}" ) print(f"Memórias relevantes encontradas: {len(memory_context)}") except Exception as e: print(f"Erro ao buscar memória: {e}") # ========================================================================= # Build messages # ========================================================================= messages = [{"role": "system", "content": SYSTEM_PROMPT}] # Adicionar contexto de memória se houver if memory_context: context_text = "\n\n**Contexto de conversas anteriores:**\n" + "\n".join(memory_context) messages.append({ "role": "system", "content": f"Informações relevantes de conversas anteriores:\n{context_text}" }) # Histórico recente for msg in request.history[-10:]: role = msg.get("role", "user") content = msg.get("content", "") if role in ["user", "assistant"] and content: messages.append({"role": role, "content": content}) messages.append({"role": "user", "content": request.message}) # ========================================================================= # Call OpenRouter # ========================================================================= bot_text = "" try: async with httpx.AsyncClient(timeout=30.0) as client: response = await client.post( "https://openrouter.ai/api/v1/chat/completions", headers={ "Authorization": f"Bearer {OPENROUTER_API_KEY}", "Content-Type": "application/json", "HTTP-Referer": "https://huggingface.co/spaces", "X-Title": "OpenAda Avatar Chat" }, json={ "model": OPENROUTER_MODEL, "messages": messages, "max_tokens": 200, "temperature": 0.7, } ) print(f"OpenRouter status: {response.status_code}") if response.status_code != 200: error_text = response.text print(f"OpenRouter error: {error_text}") raise HTTPException( status_code=500, detail=f"OpenRouter retornou {response.status_code}: {error_text[:200]}" ) data = response.json() print(f"OpenRouter response received") # Extrair texto da resposta if "choices" in data and len(data["choices"]) > 0: choice = data["choices"][0] if "message" in choice and "content" in choice["message"]: bot_text = choice["message"]["content"] elif "text" in choice: bot_text = choice["text"] if not bot_text: print(f"Não encontrou texto na resposta: {data}") bot_text = "Desculpe, não consegui processar sua mensagem." except httpx.TimeoutException: raise HTTPException(status_code=504, detail="Timeout ao conectar com OpenRouter") except httpx.HTTPError as e: print(f"HTTP Error: {e}") raise HTTPException(status_code=500, detail=f"Erro de conexão: {str(e)}") except Exception as e: print(f"Unexpected error: {e}") raise HTTPException(status_code=500, detail=f"Erro inesperado: {str(e)}") # Limpar texto bot_text = bot_text.strip() if not bot_text: bot_text = "Hmm, não entendi. Pode reformular?" # ========================================================================= # Salvar na memória # ========================================================================= if memory: try: memory.add_memory(request.message, bot_text) except Exception as e: print(f"Erro ao salvar memória: {e}") # ========================================================================= # Generate TTS # ========================================================================= clean_text = re.sub(r'[*_`~#]', '', bot_text) clean_text = re.sub(r'\[.*?\]\(.*?\)', '', clean_text) clean_text = re.sub(r'<[^>]+>', '', clean_text) clean_text = clean_text.strip() if not clean_text: clean_text = bot_text audio_base64 = "" try: communicate = edge_tts.Communicate(clean_text, TTS_VOICE, rate=TTS_RATE) audio_buffer = BytesIO() async for chunk in communicate.stream(): if chunk["type"] == "audio": audio_buffer.write(chunk["data"]) audio_buffer.seek(0) audio_data = audio_buffer.read() if len(audio_data) > 0: audio_base64 = base64.b64encode(audio_data).decode('utf-8') else: print("TTS retornou áudio vazio") except Exception as e: print(f"TTS error: {e}") # Generate visemes visemes = text_to_visemes(clean_text) duration = sum(v['duration'] for v in visemes) return ChatResponse( text=bot_text, audio_base64=audio_base64, visemes=visemes, duration=duration, memory_context=memory_context ) if __name__ == "__main__": import uvicorn uvicorn.run(app, host="0.0.0.0", port=7860)