Spaces:

Madras1
/

OpenAda

Sleeping

App Files Files Community

Madras1 commited on Jan 2

Commit

19ea065

verified ·

1 Parent(s): 6ab3259

Upload 3 files

Browse files

Files changed (3) hide show

README.md +83 -11
app.py +236 -0
requirements.txt +5 -0

README.md CHANGED Viewed

@@ -1,11 +1,83 @@
----
-title: OpenAda
-emoji: 🔥
-colorFrom: indigo
-colorTo: pink
-sdk: docker
-pinned: false
-license: mit
----
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

+# 3D Avatar Chat - Backend
+Backend para o sistema de chat com avatar 3D falante.
+## Tech Stack
+- **FastAPI** - API REST
+- **OpenRouter** - LLM para respostas
+- **Edge TTS** - Síntese de voz
+## Executar Localmente
+```bash
+# Instalar dependências
+pip install -r requirements.txt
+# Configurar variável de ambiente
+export OPENROUTER_API_KEY="sua-chave-aqui"
+# Rodar servidor
+python app.py
+# ou
+uvicorn app:app --reload --port 7860
+```
+## Deploy no HuggingFace Spaces
+1. Criar novo Space (SDK: Docker ou Gradio)
+2. Adicionar secret `OPENROUTER_API_KEY` nas configurações
+3. Push do código
+### Dockerfile (se usar Docker SDK)
+```dockerfile
+FROM python:3.11-slim
+WORKDIR /app
+COPY requirements.txt .
+RUN pip install --no-cache-dir -r requirements.txt
+COPY app.py .
+EXPOSE 7860
+CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
+```
+## Endpoints
+### GET /
+Health check básico.
+### GET /health
+Status de saúde da API.
+### POST /chat
+Processa mensagem e retorna resposta com áudio.
+**Request:**
+```json
+{
+  "message": "Olá, como você está?",
+  "history": []
+}
+```
+**Response:**
+```json
+{
+  "text": "Olá! Estou muito bem, obrigada por perguntar!",
+  "audio_base64": "//uQxAAAAAANIAAAAAExBTUUzLjEwMFVV...",
+  "visemes": [
+    {"time": 0.0, "viseme": "O", "duration": 0.065},
+    {"time": 0.065, "viseme": "L", "duration": 0.065},
+    ...
+  ],
+  "duration": 3.2
+}
+```
+## Configuração
+Variáveis de ambiente:
+- `OPENROUTER_API_KEY` - Chave da API OpenRouter (obrigatório)
+- `OPENROUTER_MODEL` - Modelo a usar (default: `google/gemini-2.0-flash-001`)

app.py ADDED Viewed

	@@ -0,0 +1,236 @@

+"""
+3D Avatar Chat Backend
+FastAPI + OpenRouter + Edge TTS
+Deploy: HuggingFace Spaces
+"""
+import os
+import asyncio
+import base64
+import httpx
+import re
+from io import BytesIO
+from fastapi import FastAPI, HTTPException
+from fastapi.middleware.cors import CORSMiddleware
+from pydantic import BaseModel
+import edge_tts
+# ============================================================================
+# Configuration
+# ============================================================================
+OPENROUTER_API_KEY = os.getenv("OPENROUTER_API_KEY", "")
+OPENROUTER_MODEL = os.getenv("OPENROUTER_MODEL", "google/gemini-2.0-flash-001")
+TTS_VOICE = "pt-BR-FranciscaNeural"
+TTS_RATE = "+0%"
+# System prompt for the chatbot
+SYSTEM_PROMPT = """Você é uma assistente virtual amigável e prestativa.
+Responda de forma natural, concisa e conversacional.
+Mantenha respostas curtas (máximo 2-3 frases) para manter a conversa fluida.
+"""
+# ============================================================================
+# Phoneme to Viseme Mapping
+# ============================================================================
+# Simplified phoneme detection for Portuguese
+VISEME_MAP = {
+    # Vowels
+    'a': 'A', 'á': 'A', 'à': 'A', 'ã': 'A', 'â': 'A',
+    'e': 'E', 'é': 'E', 'ê': 'E',
+    'i': 'I', 'í': 'I',
+    'o': 'O', 'ó': 'O', 'ô': 'O', 'õ': 'O',
+    'u': 'U', 'ú': 'U',
+    # Consonants
+    'm': 'M', 'b': 'M', 'p': 'M',
+    'f': 'F', 'v': 'F',
+    'l': 'L', 'n': 'L', 't': 'L', 'd': 'L',
+    's': 'S', 'z': 'S', 'c': 'S', 'ç': 'S',
+    'r': 'R', 'x': 'S', 'j': 'S', 'g': 'L', 'q': 'L', 'k': 'L',
+    'h': 'X',  # Silent
+    ' ': 'X',  # Silence
+}
+# Average duration per character in seconds (approximate for pt-BR)
+CHAR_DURATION = 0.065
+def text_to_visemes(text: str) -> list[dict]:
+    """
+    Convert text to a timeline of visemes.
+    Returns list of {time: float, viseme: str, duration: float}
+    """
+    visemes = []
+    current_time = 0.0
+    text_lower = text.lower()
+    i = 0
+    while i < len(text_lower):
+        char = text_lower[i]
+        # Skip punctuation (add pause)
+        if char in '.,!?;:':
+            visemes.append({
+                'time': current_time,
+                'viseme': 'X',
+                'duration': 0.15
+            })
+            current_time += 0.15
+            i += 1
+            continue
+        # Get viseme for character
+        viseme = VISEME_MAP.get(char, 'X')
+        # Merge consecutive same visemes
+        if visemes and visemes[-1]['viseme'] == viseme:
+            visemes[-1]['duration'] += CHAR_DURATION
+        else:
+            visemes.append({
+                'time': current_time,
+                'viseme': viseme,
+                'duration': CHAR_DURATION
+            })
+        current_time += CHAR_DURATION
+        i += 1
+    # Add final silence
+    visemes.append({
+        'time': current_time,
+        'viseme': 'X',
+        'duration': 0.2
+    })
+    return visemes
+# ============================================================================
+# FastAPI App
+# ============================================================================
+app = FastAPI(title="3D Avatar Chat API")
+# CORS for frontend
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+class ChatRequest(BaseModel):
+    message: str
+    history: list[dict] = []
+class ChatResponse(BaseModel):
+    text: str
+    audio_base64: str
+    visemes: list[dict]
+    duration: float
+@app.get("/")
+async def root():
+    return {"status": "ok", "message": "3D Avatar Chat API"}
+@app.get("/health")
+async def health():
+    return {"status": "healthy"}
+@app.post("/chat", response_model=ChatResponse)
+async def chat(request: ChatRequest):
+    """
+    Process chat message:
+    1. Send to OpenRouter for response
+    2. Generate audio with Edge TTS
+    3. Analyze phonemes for lip sync
+    4. Return everything
+    """
+    if not OPENROUTER_API_KEY:
+        raise HTTPException(status_code=500, detail="OPENROUTER_API_KEY not configured")
+    # Build messages for OpenRouter
+    messages = [{"role": "system", "content": SYSTEM_PROMPT}]
+    # Add history (last 10 messages)
+    for msg in request.history[-10:]:
+        messages.append({
+            "role": msg.get("role", "user"),
+            "content": msg.get("content", "")
+        })
+    # Add current message
+    messages.append({"role": "user", "content": request.message})
+    # Call OpenRouter
+    try:
+        async with httpx.AsyncClient(timeout=30.0) as client:
+            response = await client.post(
+                "https://openrouter.ai/api/v1/chat/completions",
+                headers={
+                    "Authorization": f"Bearer {OPENROUTER_API_KEY}",
+                    "Content-Type": "application/json",
+                },
+                json={
+                    "model": OPENROUTER_MODEL,
+                    "messages": messages,
+                    "max_tokens": 150,
+                    "temperature": 0.7,
+                }
+            )
+            response.raise_for_status()
+            data = response.json()
+            bot_text = data["choices"][0]["message"]["content"].strip()
+    except httpx.HTTPError as e:
+        raise HTTPException(status_code=500, detail=f"OpenRouter error: {str(e)}")
+    # Clean text for TTS (remove markdown, emojis, etc.)
+    clean_text = re.sub(r'[*_`~]', '', bot_text)
+    clean_text = re.sub(r'\[.*?\]\(.*?\)', '', clean_text)
+    # Generate audio with Edge TTS
+    try:
+        communicate = edge_tts.Communicate(clean_text, TTS_VOICE, rate=TTS_RATE)
+        audio_buffer = BytesIO()
+        async for chunk in communicate.stream():
+            if chunk["type"] == "audio":
+                audio_buffer.write(chunk["data"])
+        audio_buffer.seek(0)
+        audio_base64 = base64.b64encode(audio_buffer.read()).decode('utf-8')
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"TTS error: {str(e)}")
+    # Generate viseme timeline
+    visemes = text_to_visemes(clean_text)
+    # Calculate total duration
+    duration = sum(v['duration'] for v in visemes)
+    return ChatResponse(
+        text=bot_text,
+        audio_base64=audio_base64,
+        visemes=visemes,
+        duration=duration
+    )
+# ============================================================================
+# Run locally
+# ============================================================================
+if __name__ == "__main__":
+    import uvicorn
+    uvicorn.run(app, host="0.0.0.0", port=7860)

requirements.txt ADDED Viewed

	@@ -0,0 +1,5 @@

+fastapi==0.115.6
+uvicorn[standard]==0.34.0
+edge-tts==6.1.12
+httpx==0.28.1
+python-multipart==0.0.20