CrazyMonkey0 commited on
Commit
245cf59
·
1 Parent(s): 2a3f624

feat(chat): return NLP response with in-memory TTS audio

Browse files

- /chat endpoint now generates audio in-memory using TTS and returns it with the text.
- Uses multipart/form-data to send both text and audio in a single response.
- Removes disk I/O for audio, improving performance.

Files changed (1) hide show
  1. app/routes/nlp.py +21 -8
app/routes/nlp.py CHANGED
@@ -1,8 +1,9 @@
1
- from fastapi import APIRouter, Request
2
  from pydantic import BaseModel
3
  from llama_cpp import Llama
4
  from llama_cpp.llama_chat_format import Qwen25VLChatHandler
5
  from .tts import save_audio
 
6
 
7
  router = APIRouter()
8
 
@@ -39,6 +40,7 @@ def load_model_nlp():
39
  print("[INFO] NLP model loaded.")
40
  return llm
41
 
 
42
  @router.post("/chat")
43
  async def chat(request: Request, chat_request: ChatRequest):
44
  """Endpoint for chat with the NLP model."""
@@ -62,10 +64,21 @@ async def chat(request: Request, chat_request: ChatRequest):
62
  top_k=50,
63
  )
64
 
65
- # Extract response text
66
- response_text = output['choices'][0]['message']['content'].strip()
67
- #audio = save_audio(request, response_text)
68
- return {
69
- "response": response_text,
70
- "audio": 'audio'
71
- }
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import APIRouter, Request, Response
2
  from pydantic import BaseModel
3
  from llama_cpp import Llama
4
  from llama_cpp.llama_chat_format import Qwen25VLChatHandler
5
  from .tts import save_audio
6
+ import uuid
7
 
8
  router = APIRouter()
9
 
 
40
  print("[INFO] NLP model loaded.")
41
  return llm
42
 
43
+
44
  @router.post("/chat")
45
  async def chat(request: Request, chat_request: ChatRequest):
46
  """Endpoint for chat with the NLP model."""
 
64
  top_k=50,
65
  )
66
 
67
+ response_text = output["choices"][0]["message"]["content"].strip()
68
+ audio_bytes = save_audio(request, response_text)
69
+
70
+ boundary = uuid.uuid4().hex
71
+
72
+ body = (
73
+ f"--{boundary}\r\n"
74
+ f"Content-Disposition: form-data; name=\"text\"\r\n\r\n"
75
+ f"{response_text}\r\n"
76
+ f"--{boundary}\r\n"
77
+ f"Content-Disposition: form-data; name=\"audio\"; filename=\"speech.wav\"\r\n"
78
+ f"Content-Type: audio/wav\r\n\r\n"
79
+ ).encode() + audio_bytes + f"\r\n--{boundary}--\r\n".encode()
80
+
81
+ return Response(
82
+ content=body,
83
+ media_type=f"multipart/form-data; boundary={boundary}"
84
+ )