Spaces:

Abdalkaderdev
/

ORA

Paused

App Files Files Community

Abdalkaderdev commited on Jan 12

Commit

5058539

1 Parent(s): d2505af

Switch to ElevenLabs TTS for natural voice

Browse files

Files changed (2) hide show

app/ora_server.py +32 -34
frontend/app/voice/page.tsx +28 -20

app/ora_server.py CHANGED Viewed

@@ -110,57 +110,55 @@ async def chat_endpoint(req: ChatRequest):
     return {"response": response_text}
-# TTS endpoint using Supertonic 2
-tts_model = None
-tts_processor = None
 @app.on_event("startup")
 async def load_tts():
-    global tts_model, tts_processor
-    try:
-        print("Loading Supertonic 2 TTS...")
-        from transformers import AutoProcessor, AutoModel
-        tts_processor = AutoProcessor.from_pretrained("Supertone/supertonic-2")
-        tts_model = AutoModel.from_pretrained("Supertone/supertonic-2")
-        if device == "cuda":
-            tts_model = tts_model.to("cuda")
-        print("TTS Model loaded successfully!")
-    except Exception as e:
-        print(f"Could not load TTS model: {e}")
-        print("Voice will not be available.")
 class TTSRequest(BaseModel):
     text: str
 @app.post("/api/tts")
 async def text_to_speech(req: TTSRequest):
-    global tts_model, tts_processor
-    if tts_model is None or tts_processor is None:
-        raise HTTPException(status_code=503, detail="TTS model not loaded")
     try:
-        inputs = tts_processor(text=req.text, return_tensors="pt")
-        if device == "cuda":
-            inputs = {k: v.to("cuda") for k, v in inputs.items()}
-        with torch.no_grad():
-            audio = tts_model.generate(**inputs)
-        # Convert to numpy and create WAV
-        audio_np = audio.cpu().numpy().squeeze()
-        # Normalize audio
-        audio_np = np.int16(audio_np / np.max(np.abs(audio_np)) * 32767)
-        # Create WAV file in memory
-        wav_io = io.BytesIO()
-        wavfile.write(wav_io, 22050, audio_np)
-        wav_io.seek(0)
-        return Response(content=wav_io.read(), media_type="audio/wav")
     except Exception as e:
         raise HTTPException(status_code=500, detail=f"TTS generation failed: {str(e)}")
 # Mount Static Frontend (Must be last)

     return {"response": response_text}
+# TTS endpoint using ElevenLabs (most natural voice)
 @app.on_event("startup")
 async def load_tts():
+    print("TTS: Using ElevenLabs for natural voice synthesis")
+    # ElevenLabs doesn't require model loading, uses API
 class TTSRequest(BaseModel):
     text: str
 @app.post("/api/tts")
 async def text_to_speech(req: TTSRequest):
     try:
+        # Use ElevenLabs free tier with their best voice
+        import requests
+        # Rachel voice (warm, natural female voice)
+        voice_id = "21m00Tcm4TlvDq8ikWAM"
+        url = f"https://api.elevenlabs.io/v1/text-to-speech/{voice_id}"
+        headers = {
+            "Content-Type": "application/json",
+        }
+        data = {
+            "text": req.text,
+            "model_id": "eleven_monolingual_v1",
+            "voice_settings": {
+                "stability": 0.5,
+                "similarity_boost": 0.75
+            }
+        }
+        # Try with API key from environment if available
+        api_key = os.getenv("ELEVENLABS_API_KEY")
+        if api_key:
+            headers["xi-api-key"] = api_key
+        response = requests.post(url, json=data, headers=headers)
+        if response.status_code == 200:
+            return Response(content=response.content, media_type="audio/mpeg")
+        else:
+            # Fallback to browser TTS if ElevenLabs fails
+            raise HTTPException(status_code=503, detail="TTS service unavailable, use browser fallback")
     except Exception as e:
+        print(f"TTS error: {e}")
         raise HTTPException(status_code=500, detail=f"TTS generation failed: {str(e)}")
 # Mount Static Frontend (Must be last)

frontend/app/voice/page.tsx CHANGED Viewed

@@ -66,37 +66,45 @@ export default function OraVoice() {
         setState("SPEAKING");
         try {
-            // Call backend TTS API for professional voice
             const res = await fetch("/api/tts", {
                 method: "POST",
                 headers: { "Content-Type": "application/json" },
                 body: JSON.stringify({ text }),
             });
-            if (!res.ok) {
-                throw new Error("TTS failed");
-            }
-            // Get audio blob and play it
-            const audioBlob = await res.blob();
-            const audioUrl = URL.createObjectURL(audioBlob);
-            const audio = new Audio(audioUrl);
-            audio.onended = () => {
-                setState("IDLE");
-                URL.revokeObjectURL(audioUrl);
-            };
-            audio.onerror = () => {
-                setState("IDLE");
-                URL.revokeObjectURL(audioUrl);
-            };
-            await audio.play();
         } catch (error) {
-            console.error("TTS error:", error);
-            setState("IDLE");
         }
     };

         setState("SPEAKING");
         try {
+            // Try backend TTS first (ElevenLabs - most natural)
             const res = await fetch("/api/tts", {
                 method: "POST",
                 headers: { "Content-Type": "application/json" },
                 body: JSON.stringify({ text }),
             });
+            if (res.ok) {
+                // Backend TTS succeeded
+                const audioBlob = await res.blob();
+                const audioUrl = URL.createObjectURL(audioBlob);
+                const audio = new Audio(audioUrl);
+                audio.onended = () => {
+                    setState("IDLE");
+                    URL.revokeObjectURL(audioUrl);
+                };
+                audio.onerror = () => {
+                    setState("IDLE");
+                    URL.revokeObjectURL(audioUrl);
+                };
+                await audio.play();
+            } else {
+                // Fallback to browser TTS
+                throw new Error("Backend TTS unavailable");
+            }
         } catch (error) {
+            console.log("Using browser TTS fallback");
+            // Browser TTS fallback
+            const utterance = new SpeechSynthesisUtterance(text);
+            utterance.rate = 0.9;
+            utterance.pitch = 0.95;
+            utterance.onend = () => setState("IDLE");
+            utterance.onerror = () => setState("IDLE");
+            window.speechSynthesis.speak(utterance);
         }
     };