Update app.py
Browse files
app.py
CHANGED
|
@@ -28,6 +28,11 @@ async def serve_homepage():
|
|
| 28 |
"""Serve the chat interface HTML."""
|
| 29 |
with open("static/index.html", "r") as f:
|
| 30 |
return Response(content=f.read(), media_type="text/html")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 31 |
|
| 32 |
@app.post("/stt/")
|
| 33 |
async def speech_to_text(file: UploadFile = File(...)):
|
|
@@ -132,6 +137,91 @@ async def chat_with_llm(file: UploadFile = File(...)):
|
|
| 132 |
}
|
| 133 |
)
|
| 134 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 135 |
except Exception as e:
|
| 136 |
print(f"Error processing audio: {str(e)}")
|
| 137 |
return Response(
|
|
|
|
| 28 |
"""Serve the chat interface HTML."""
|
| 29 |
with open("static/index.html", "r") as f:
|
| 30 |
return Response(content=f.read(), media_type="text/html")
|
| 31 |
+
@app.get("/conv")
|
| 32 |
+
async def serve_homepage():
|
| 33 |
+
"""Serve the chat interface HTML."""
|
| 34 |
+
with open("static/conv.html", "r") as f:
|
| 35 |
+
return Response(content=f.read(), media_type="text/html")
|
| 36 |
|
| 37 |
@app.post("/stt/")
|
| 38 |
async def speech_to_text(file: UploadFile = File(...)):
|
|
|
|
| 137 |
}
|
| 138 |
)
|
| 139 |
|
| 140 |
+
except Exception as e:
|
| 141 |
+
print(f"Error processing audio: {str(e)}")
|
| 142 |
+
return Response(
|
| 143 |
+
content=f"Error processing audio: {str(e)}",
|
| 144 |
+
media_type="text/plain",
|
| 145 |
+
status_code=500
|
| 146 |
+
)
|
| 147 |
+
@app.post("/continuous-chat/")
|
| 148 |
+
async def continuous_chat(
|
| 149 |
+
file: UploadFile = File(...),
|
| 150 |
+
chat_history: Optional[str] = Form(None)
|
| 151 |
+
):
|
| 152 |
+
"""Process input WAV with chat history, send text to LLM, and return response as WAV."""
|
| 153 |
+
# Initialize ggwave instance
|
| 154 |
+
instance = ggwave.init()
|
| 155 |
+
|
| 156 |
+
# Parse chat history if provided
|
| 157 |
+
messages = [{"role": "system", "content": "you are a helpful assistant. answer always in one sentence"}]
|
| 158 |
+
|
| 159 |
+
if chat_history:
|
| 160 |
+
try:
|
| 161 |
+
history = json.loads(chat_history)
|
| 162 |
+
for msg in history:
|
| 163 |
+
if msg["role"] in ["user", "assistant"]:
|
| 164 |
+
messages.append(msg)
|
| 165 |
+
except Exception as e:
|
| 166 |
+
print(f"Error parsing chat history: {str(e)}")
|
| 167 |
+
|
| 168 |
+
# Read the file content into memory
|
| 169 |
+
file_content = await file.read()
|
| 170 |
+
|
| 171 |
+
# Process the audio file
|
| 172 |
+
with io.BytesIO(file_content) as buffer:
|
| 173 |
+
try:
|
| 174 |
+
fs, recorded_waveform = wav.read(buffer)
|
| 175 |
+
recorded_waveform = recorded_waveform.astype(np.float32) / 32767.0
|
| 176 |
+
waveform_bytes = recorded_waveform.tobytes()
|
| 177 |
+
user_message = ggwave.decode(instance, waveform_bytes)
|
| 178 |
+
|
| 179 |
+
if user_message is None:
|
| 180 |
+
return Response(
|
| 181 |
+
content="No message detected in audio",
|
| 182 |
+
media_type="text/plain",
|
| 183 |
+
status_code=400
|
| 184 |
+
)
|
| 185 |
+
|
| 186 |
+
decoded_message = user_message.decode("utf-8")
|
| 187 |
+
print("user_message: " + decoded_message)
|
| 188 |
+
|
| 189 |
+
# Add user message to messages
|
| 190 |
+
messages.append({"role": "user", "content": decoded_message})
|
| 191 |
+
|
| 192 |
+
# Send to LLM with full chat history
|
| 193 |
+
chat_completion = client.chat.completions.create(
|
| 194 |
+
messages=messages,
|
| 195 |
+
model="llama-3.3-70b-versatile",
|
| 196 |
+
)
|
| 197 |
+
|
| 198 |
+
llm_response = chat_completion.choices[0].message.content
|
| 199 |
+
print(llm_response)
|
| 200 |
+
|
| 201 |
+
# Convert response to audio
|
| 202 |
+
encoded_waveform = ggwave.encode(llm_response, protocolId=1, volume=100)
|
| 203 |
+
waveform_float32 = np.frombuffer(encoded_waveform, dtype=np.float32)
|
| 204 |
+
waveform_int16 = np.int16(waveform_float32 * 32767)
|
| 205 |
+
|
| 206 |
+
# Save to buffer
|
| 207 |
+
buffer = io.BytesIO()
|
| 208 |
+
with wave.open(buffer, "wb") as wf:
|
| 209 |
+
wf.setnchannels(1)
|
| 210 |
+
wf.setsampwidth(2)
|
| 211 |
+
wf.setframerate(48000)
|
| 212 |
+
wf.writeframes(waveform_int16.tobytes())
|
| 213 |
+
|
| 214 |
+
buffer.seek(0)
|
| 215 |
+
|
| 216 |
+
return Response(
|
| 217 |
+
content=buffer.getvalue(),
|
| 218 |
+
media_type="audio/wav",
|
| 219 |
+
headers={
|
| 220 |
+
"X-User-Message": decoded_message,
|
| 221 |
+
"X-LLM-Response": llm_response
|
| 222 |
+
}
|
| 223 |
+
)
|
| 224 |
+
|
| 225 |
except Exception as e:
|
| 226 |
print(f"Error processing audio: {str(e)}")
|
| 227 |
return Response(
|