Spaces:

DataEyond
/

Demo-Voice-Agent-Service

Sleeping

App Files Files Community

ishaq101 commited on 29 days ago

Commit

aebb7d4

1 Parent(s): dd0dc33

[NOTICKET] feat: endpoint stt and tts

Browse files

Files changed (4) hide show

main.py +35 -3
pyproject.toml +1 -0
src/stt/deepgram_rest.py +32 -0
uv.lock +11 -0

main.py CHANGED Viewed

@@ -1,10 +1,13 @@
 import json
 import logging
 import uvicorn
-from fastapi import FastAPI, WebSocket, WebSocketDisconnect
-from fastapi.responses import JSONResponse
 from src.pipeline import EchoPipeline
-from src.config import DEEPGRAM_API_KEY, CARTESIA_API_KEY, CARTESIA_VOICE_ID
 logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(name)s: %(message)s")
 logger = logging.getLogger(__name__)
@@ -32,6 +35,35 @@ async def health() -> JSONResponse:
     return JSONResponse(status_code=200 if all_ready else 503, content=body)
 @app.websocket("/ws/voice")
 async def voice_ws(ws: WebSocket) -> None:
     await ws.accept()

 import json
 import logging
 import uvicorn
+from fastapi import FastAPI, WebSocket, WebSocketDisconnect, File, UploadFile, HTTPException
+from fastapi.responses import JSONResponse, StreamingResponse
+from pydantic import BaseModel
 from src.pipeline import EchoPipeline
+from src.config import DEEPGRAM_API_KEY, CARTESIA_API_KEY, CARTESIA_VOICE_ID, SAMPLE_RATE
+from src.stt.deepgram_rest import transcribe_audio
+from src.tts.cartesia_client import synthesize_stream
 logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(name)s: %(message)s")
 logger = logging.getLogger(__name__)
     return JSONResponse(status_code=200 if all_ready else 503, content=body)
+class TTSRequest(BaseModel):
+    text: str
+@app.post("/stt")
+async def speech_to_text(audio: UploadFile = File(...)) -> JSONResponse:
+    data = await audio.read()
+    if not data:
+        raise HTTPException(status_code=400, detail="Audio file is empty.")
+    mimetype = audio.content_type or "audio/wav"
+    result = await transcribe_audio(data, mimetype=mimetype)
+    return JSONResponse(content=result)
+@app.post("/tts")
+async def text_to_speech(req: TTSRequest) -> StreamingResponse:
+    if not req.text.strip():
+        raise HTTPException(status_code=400, detail="text must not be empty.")
+    return StreamingResponse(
+        synthesize_stream(req.text),
+        media_type="audio/pcm",
+        headers={
+            "X-Sample-Rate": str(SAMPLE_RATE),
+            "X-Encoding": "pcm_s16le",
+            "X-Channels": "1",
+        },
+    )
 @app.websocket("/ws/voice")
 async def voice_ws(ws: WebSocket) -> None:
     await ws.accept()

pyproject.toml CHANGED Viewed

@@ -11,6 +11,7 @@ dependencies = [
     "assemblyai==0.33.0",
     "cartesia==1.3.1",
     "deepgram-sdk>=6.1.1",
 ]
 [project.optional-dependencies]

     "assemblyai==0.33.0",
     "cartesia==1.3.1",
     "deepgram-sdk>=6.1.1",
+    "python-multipart>=0.0.26",
 ]
 [project.optional-dependencies]

src/stt/deepgram_rest.py ADDED Viewed

	@@ -0,0 +1,32 @@

+import logging
+from deepgram import AsyncDeepgramClient
+from src.config import DEEPGRAM_API_KEY, DEEPGRAM_LANGUAGE
+logger = logging.getLogger(__name__)
+async def transcribe_audio(data: bytes, mimetype: str = "audio/wav") -> dict:
+    """
+    Transcribes a full audio file using Deepgram pre-recorded API.
+    Returns dict with 'text' (full transcript) and 'duration' (seconds).
+    """
+    client = AsyncDeepgramClient(api_key=DEEPGRAM_API_KEY)
+    response = await client.listen.v1.media.transcribe_file(
+        request=data,
+        model="nova-2",
+        language=DEEPGRAM_LANGUAGE,
+        smart_format=True,
+    )
+    try:
+        transcript = response.results.channels[0].alternatives[0].transcript
+    except (AttributeError, IndexError):
+        transcript = ""
+    try:
+        duration = response.metadata.duration
+    except AttributeError:
+        duration = None
+    logger.info("STT transcript (%ss): %s", duration, transcript[:80])
+    return {"text": transcript, "language": DEEPGRAM_LANGUAGE, "duration": duration}

uv.lock CHANGED Viewed

@@ -876,6 +876,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/6a/3e/b68c118422ec867fa7ab88444e1274aa40681c606d59ac27de5a5588f082/python_dotenv-1.0.1-py3-none-any.whl", hash = "sha256:f7b63ef50f1b690dddf550d03497b66d609393b40b564ed0d674909a68ebf16a", size = 19863, upload-time = "2024-01-23T06:32:58.246Z" },
 ]
 [[package]]
 name = "pyyaml"
 version = "6.0.3"
@@ -1070,6 +1079,7 @@ dependencies = [
     { name = "fastapi" },
     { name = "httpx" },
     { name = "python-dotenv" },
     { name = "uvicorn", extra = ["standard"] },
     { name = "websockets" },
 ]
@@ -1087,6 +1097,7 @@ requires-dist = [
     { name = "fastapi", specifier = "==0.115.0" },
     { name = "httpx", specifier = "==0.27.2" },
     { name = "python-dotenv", specifier = "==1.0.1" },
     { name = "uvicorn", extras = ["standard"], specifier = "==0.30.6" },
     { name = "websockets", specifier = "==13.1" },
     { name = "websockets", marker = "extra == 'dev'", specifier = "==13.1" },

     { url = "https://files.pythonhosted.org/packages/6a/3e/b68c118422ec867fa7ab88444e1274aa40681c606d59ac27de5a5588f082/python_dotenv-1.0.1-py3-none-any.whl", hash = "sha256:f7b63ef50f1b690dddf550d03497b66d609393b40b564ed0d674909a68ebf16a", size = 19863, upload-time = "2024-01-23T06:32:58.246Z" },
 ]
+[[package]]
+name = "python-multipart"
+version = "0.0.26"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/88/71/b145a380824a960ebd60e1014256dbb7d2253f2316ff2d73dfd8928ec2c3/python_multipart-0.0.26.tar.gz", hash = "sha256:08fadc45918cd615e26846437f50c5d6d23304da32c341f289a617127b081f17", size = 43501, upload-time = "2026-04-10T14:09:59.473Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/9a/22/f1925cdda983ab66fc8ec6ec8014b959262747e58bdca26a4e3d1da29d56/python_multipart-0.0.26-py3-none-any.whl", hash = "sha256:c0b169f8c4484c13b0dcf2ef0ec3a4adb255c4b7d18d8e420477d2b1dd03f185", size = 28847, upload-time = "2026-04-10T14:09:58.131Z" },
+]
 [[package]]
 name = "pyyaml"
 version = "6.0.3"
     { name = "fastapi" },
     { name = "httpx" },
     { name = "python-dotenv" },
+    { name = "python-multipart" },
     { name = "uvicorn", extra = ["standard"] },
     { name = "websockets" },
 ]
     { name = "fastapi", specifier = "==0.115.0" },
     { name = "httpx", specifier = "==0.27.2" },
     { name = "python-dotenv", specifier = "==1.0.1" },
+    { name = "python-multipart", specifier = ">=0.0.26" },
     { name = "uvicorn", extras = ["standard"], specifier = "==0.30.6" },
     { name = "websockets", specifier = "==13.1" },
     { name = "websockets", marker = "extra == 'dev'", specifier = "==13.1" },