Update api.py
Browse files
api.py
CHANGED
|
@@ -232,7 +232,7 @@ def voice_chat():
|
|
| 232 |
audio.save(tmp.name)
|
| 233 |
audio_path = tmp.name
|
| 234 |
|
| 235 |
-
# Step 1️⃣: Transcribe
|
| 236 |
transcription = client.audio.transcriptions.create(
|
| 237 |
model="whisper-1",
|
| 238 |
file=open(audio_path, "rb"),
|
|
@@ -240,37 +240,65 @@ def voice_chat():
|
|
| 240 |
text = transcription.text.strip()
|
| 241 |
print(f"🎤 Transcribed: {text}")
|
| 242 |
|
| 243 |
-
|
| 244 |
-
|
| 245 |
-
|
| 246 |
-
|
| 247 |
-
|
| 248 |
-
|
| 249 |
-
|
| 250 |
-
|
| 251 |
-
|
| 252 |
-
|
| 253 |
-
|
| 254 |
-
|
| 255 |
-
|
| 256 |
-
|
| 257 |
-
|
| 258 |
-
|
| 259 |
-
|
| 260 |
-
|
| 261 |
-
|
| 262 |
-
|
| 263 |
-
|
| 264 |
-
|
| 265 |
-
|
| 266 |
-
|
| 267 |
-
|
| 268 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 269 |
return jsonify(
|
| 270 |
{
|
| 271 |
"transcript": text,
|
| 272 |
"answer": answer_text,
|
| 273 |
"audio_url": f"https://mahmous-chatbot3.hf.space/audio/{os.path.basename(speech_file.name)}",
|
|
|
|
| 274 |
}
|
| 275 |
)
|
| 276 |
|
|
@@ -279,6 +307,7 @@ def voice_chat():
|
|
| 279 |
return jsonify({"error": str(e)}), 500
|
| 280 |
|
| 281 |
|
|
|
|
| 282 |
@app.route("/audio/<filename>")
|
| 283 |
def serve_audio(filename):
|
| 284 |
return send_file(
|
|
|
|
| 232 |
audio.save(tmp.name)
|
| 233 |
audio_path = tmp.name
|
| 234 |
|
| 235 |
+
# Step 1️⃣: Transcribe user speech to text using Whisper
|
| 236 |
transcription = client.audio.transcriptions.create(
|
| 237 |
model="whisper-1",
|
| 238 |
file=open(audio_path, "rb"),
|
|
|
|
| 240 |
text = transcription.text.strip()
|
| 241 |
print(f"🎤 Transcribed: {text}")
|
| 242 |
|
| 243 |
+
if not text:
|
| 244 |
+
return jsonify({"error": "Transcription failed or empty"}), 400
|
| 245 |
+
|
| 246 |
+
# Step 2️⃣: Retrieve Pinecone context (book knowledge)
|
| 247 |
+
context, results = "", []
|
| 248 |
+
try:
|
| 249 |
+
raw_results = retriever.retrieve(text)
|
| 250 |
+
MIN_SCORE = 0.02 # slightly lower for better recall
|
| 251 |
+
results = [r for r in raw_results if r.get("score", 0) >= MIN_SCORE]
|
| 252 |
+
if results:
|
| 253 |
+
context = "\n\n---\n\n".join(
|
| 254 |
+
[f"(Seite {r['page']}) {r['context']}" for r in results]
|
| 255 |
+
)
|
| 256 |
+
except Exception as e:
|
| 257 |
+
print("⚠️ Retriever error:", e)
|
| 258 |
+
|
| 259 |
+
# Step 3️⃣: Choose prompt
|
| 260 |
+
if context:
|
| 261 |
+
sys_prompt = system_prompt_book_only()
|
| 262 |
+
user_prompt = f"Question: {text}\n\nBook context:\n{context}"
|
| 263 |
+
else:
|
| 264 |
+
sys_prompt = system_prompt_fallback()
|
| 265 |
+
user_prompt = text
|
| 266 |
+
|
| 267 |
+
# Step 4️⃣: Generate answer with GPT (based on book context)
|
| 268 |
+
try:
|
| 269 |
+
response = client.chat.completions.create(
|
| 270 |
+
model="gpt-4o",
|
| 271 |
+
messages=[
|
| 272 |
+
{"role": "system", "content": sys_prompt},
|
| 273 |
+
{"role": "user", "content": user_prompt},
|
| 274 |
+
],
|
| 275 |
+
max_tokens=700,
|
| 276 |
+
)
|
| 277 |
+
answer_text = response.choices[0].message.content.strip()
|
| 278 |
+
except Exception as e:
|
| 279 |
+
traceback.print_exc()
|
| 280 |
+
return jsonify({"error": f"GPT generation failed: {e}"}), 500
|
| 281 |
+
|
| 282 |
+
# Step 5️⃣: Generate voice reply with GPT TTS
|
| 283 |
+
try:
|
| 284 |
+
speech_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
|
| 285 |
+
with client.audio.speech.with_streaming_response.create(
|
| 286 |
+
model="gpt-4o-tts",
|
| 287 |
+
voice="alloy",
|
| 288 |
+
input=answer_text,
|
| 289 |
+
) as speech:
|
| 290 |
+
speech.stream_to_file(speech_file.name)
|
| 291 |
+
except Exception as e:
|
| 292 |
+
traceback.print_exc()
|
| 293 |
+
return jsonify({"error": f"TTS failed: {e}"}), 500
|
| 294 |
+
|
| 295 |
+
# Step 6️⃣: Return transcript + answer + audio
|
| 296 |
return jsonify(
|
| 297 |
{
|
| 298 |
"transcript": text,
|
| 299 |
"answer": answer_text,
|
| 300 |
"audio_url": f"https://mahmous-chatbot3.hf.space/audio/{os.path.basename(speech_file.name)}",
|
| 301 |
+
"source": [r.get("page") for r in results if r.get("page")],
|
| 302 |
}
|
| 303 |
)
|
| 304 |
|
|
|
|
| 307 |
return jsonify({"error": str(e)}), 500
|
| 308 |
|
| 309 |
|
| 310 |
+
|
| 311 |
@app.route("/audio/<filename>")
|
| 312 |
def serve_audio(filename):
|
| 313 |
return send_file(
|