Arnavkumar01 commited on
Commit
1a15b05
·
1 Parent(s): e141e7c

I wish I could RM/RF my way through this, but well changes were done to the voice conversion code for elevenlabs

Browse files
Files changed (1) hide show
  1. main.py +140 -131
main.py CHANGED
@@ -3,8 +3,6 @@ import io
3
  import json
4
  import re
5
  import tempfile
6
- import asyncio
7
- from typing import Optional
8
  import logging
9
  from contextlib import asynccontextmanager
10
  from fastapi import FastAPI, Request, status, Depends, Header, HTTPException
@@ -20,10 +18,15 @@ from sqlalchemy import create_engine
20
  # --- GRADIO ---
21
  import gradio as gr
22
 
23
- # --- SETUP ---
24
- os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
25
- logging.getLogger('tensorflow').setLevel(logging.ERROR)
26
- logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
 
 
 
 
 
27
 
28
  load_dotenv()
29
  NEON_DATABASE_URL = os.getenv("NEON_DATABASE_URL")
@@ -31,12 +34,15 @@ OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
31
  ELEVENLABS_API_KEY = os.getenv("ELEVENLABS_API_KEY")
32
  SHARED_SECRET = os.getenv("SHARED_SECRET")
33
 
34
- # --- CONFIG ---
35
  COLLECTION_NAME = "real_estate_embeddings"
36
  EMBEDDING_MODEL = "hkunlp/instructor-large"
37
- ELEVENLABS_VOICE_NAME = "Leo"
 
 
 
38
  PLANNER_MODEL = "gpt-4o-mini"
39
  ANSWERER_MODEL = "gpt-4o"
 
40
  TABLE_DESCRIPTIONS = """
41
  - "ongoing_projects_source": Details about projects currently under construction.
42
  - "upcoming_projects_source": Information on future planned projects.
@@ -46,37 +52,48 @@ TABLE_DESCRIPTIONS = """
46
  - "feedback_source": Customer feedback and ratings for projects.
47
  """
48
 
49
- # --- CLIENTS ---
 
 
50
  embeddings = None
51
  vector_store = None
52
  client_openai = OpenAI(api_key=OPENAI_API_KEY)
53
- client_elevenlabs = None # Initialize as None first
54
 
55
- # --- ADDED: DETAILED ELEVENLABS INITIALIZATION LOGGING ---
56
  try:
57
- # Log the key (partially) to verify it's being read
58
- key_preview = ELEVENLABS_API_KEY[:5] + "..." + ELEVENLABS_API_KEY[-4:] if ELEVENLABS_API_KEY and len(ELEVENLABS_API_KEY) > 9 else "None or too short"
59
- logging.info(f"Attempting to initialize ElevenLabs client with key: {key_preview}")
 
 
 
60
 
61
- # Ensure key is not None or empty before initializing
62
  if not ELEVENLABS_API_KEY:
63
- raise ValueError("ELEVENLABS_API_KEY environment variable not set or empty.")
64
 
65
  client_elevenlabs = ElevenLabs(api_key=ELEVENLABS_API_KEY)
66
- logging.info(f"Initialized ElevenLabs client object. Type: {type(client_elevenlabs)}")
67
 
68
- # Try accessing a simple attribute or method to confirm initialization
69
- # Note: This might make a network call during startup
70
  voices = client_elevenlabs.voices.get_all()
71
- logging.info(f"Successfully fetched {len(voices.voices)} voices from ElevenLabs.")
72
 
73
  except Exception as e:
74
- logging.error(f"Failed to initialize ElevenLabs client or fetch voices: {e}", exc_info=True)
75
- client_elevenlabs = None # Ensure it's None if init failed
76
- # --- END ADDED LOGGING ---
 
 
 
77
 
 
 
 
78
 
79
- # --- LIFESPAN ---
 
 
80
  @asynccontextmanager
81
  async def lifespan(app: FastAPI):
82
  global embeddings, vector_store
@@ -94,18 +111,12 @@ async def lifespan(app: FastAPI):
94
  yield
95
  logging.info("Shutting down.")
96
 
97
- # --- ADDED: LIBRARY VERSION LOGGING ---
98
- try:
99
- import elevenlabs
100
- logging.info(f"Found elevenlabs library version: {elevenlabs.__version__}")
101
- except ImportError:
102
- logging.error("Could not import elevenlabs library!")
103
- # --- END ADDED LOGGING ---
104
 
105
  app = FastAPI(lifespan=lifespan)
106
 
107
-
108
- # --- PROMPTS ---
 
109
  QUERY_FORMULATION_PROMPT = """
110
  You are a query analysis agent. Transform the user's query into a precise search query and determine the correct table to filter by.
111
  **Available Tables:**
@@ -116,7 +127,7 @@ You are a query analysis agent. Transform the user's query into a precise search
116
  2. If status keywords (ongoing, completed, upcoming, etc.) are present, pick the matching table.
117
  3. If no status keyword, set filter_table to null.
118
  4. Return JSON: {{"search_query": "...", "filter_table": "table_name or null"}}
119
- """
120
 
121
  ANSWER_SYSTEM_PROMPT = """
122
  You are an expert AI assistant for a premier real estate developer.
@@ -128,158 +139,154 @@ You are an expert AI assistant for a premier real estate developer.
128
  1. Match user language (Hinglish → Hinglish, English → English).
129
  2. Use CONTEXT if available, else use core knowledge.
130
  3. Only answer real estate questions.
131
- """
132
-
133
 
134
- # --- AUDIO & LLM HELPERS ---
 
 
135
  def transcribe_audio(audio_path: str, audio_bytes: bytes) -> str:
136
  for attempt in range(3):
137
  try:
138
  audio_file = io.BytesIO(audio_bytes)
139
- filename = os.path.basename(audio_path) # e.g., "audio.wav"
140
-
141
- logging.info(f"Transcribing audio: {filename} ({len(audio_bytes)} bytes)")
142
 
 
143
  transcript = client_openai.audio.transcriptions.create(
144
  model="whisper-1",
145
- file=(filename, audio_file) # ← Critical: gives format hint
146
  )
147
  text = transcript.text.strip()
148
 
149
  # Hinglish transliteration
150
- if re.search(r'[\u0900-\u097F]', text):
151
- response = client_openai.chat.completions.create(
152
  model="gpt-4o-mini",
153
- messages=[{"role": "user", "content": f"Transliterate to Roman (Hinglish): {text}"}],
154
- temperature=0.0
 
 
155
  )
156
- text = response.choices[0].message.content.strip()
157
 
158
  logging.info(f"Transcribed: {text}")
159
  return text
160
-
161
  except Exception as e:
162
- logging.error(f"Transcription error (attempt {attempt+1}): {e}", exc_info=True) # Added exc_info
163
  if attempt == 2:
164
  return ""
165
  return ""
166
 
167
- # --- UPDATED generate_elevenlabs_sync with check ---
168
- def generate_elevenlabs_sync(text: str, voice: str) -> bytes:
169
- # --- ADDED THIS CHECK ---
 
 
 
170
  if client_elevenlabs is None:
171
- logging.error("ElevenLabs client is not initialized. Cannot generate audio.")
172
- return b''
173
- # --- END ADDED CHECK ---
174
 
175
  for attempt in range(3):
176
  try:
177
- # This call might still fail if init succeeded but key is bad at runtime
178
- logging.info(f"Calling ElevenLabs generate for voice '{voice}'...")
179
- audio_data = client_elevenlabs.generate(
180
  text=text,
181
- voice=voice,
182
  model="eleven_multilingual_v2",
183
- output_format="mp3_44100_128"
184
  )
185
- # Check if generate returns bytes directly or needs iteration (depends on exact version/method)
186
- if isinstance(audio_data, bytes):
187
- logging.info(f"ElevenLabs generate returned {len(audio_data)} bytes.")
188
- return audio_data
189
- else:
190
- # Handle streaming iterator if necessary
191
- chunks = b""
192
- for chunk in audio_data:
193
- chunks += chunk
194
- logging.info(f"ElevenLabs generate streamed {len(chunks)} bytes.")
195
- return chunks
196
-
197
  except Exception as e:
198
- logging.error(f"ElevenLabs error during generate (attempt {attempt+1}): {e}", exc_info=True) # Added exc_info
 
 
199
  if attempt == 2:
200
- return b''
201
- return b''
202
- # --- END UPDATED FUNCTION ---
203
 
204
  async def formulate_search_plan(user_query: str) -> dict:
205
- logging.info(f"Formulating search plan for query: {user_query}")
206
  for attempt in range(3):
207
  try:
208
- # Format the prompt here with BOTH variables
209
- formatted_prompt = QUERY_FORMULATION_PROMPT.format(
210
- table_descriptions=TABLE_DESCRIPTIONS,
211
- user_query=user_query
212
  )
213
-
214
- response = await run_in_threadpool(
215
  client_openai.chat.completions.create,
216
  model=PLANNER_MODEL,
217
- messages=[{"role": "user", "content": formatted_prompt}], # Use the fully formatted prompt
218
  response_format={"type": "json_object"},
219
- temperature=0.0
220
  )
221
- # Log the raw response BEFORE trying to parse
222
- raw_response_content = response.choices[0].message.content
223
- logging.info(f"Raw Planner LLM response content: {raw_response_content}")
224
-
225
- # Try parsing
226
- plan = json.loads(raw_response_content)
227
- logging.info(f"Successfully parsed search plan: {plan}")
228
  return plan
229
  except Exception as e:
230
- # Log the specific error during parsing or API call, with traceback
231
- logging.error(f"Planner error (attempt {attempt+1}): {e}", exc_info=True)
232
  if attempt == 2:
233
- logging.warning("Planner failed after 3 attempts. Using fallback.")
234
  return {"search_query": user_query, "filter_table": None}
235
- # Fallback if loop finishes unexpectedly
236
- logging.error("Planner loop finished unexpectedly. Using fallback.")
237
  return {"search_query": user_query, "filter_table": None}
238
 
 
239
  async def get_agent_response(user_text: str) -> str:
240
  for attempt in range(3):
241
  try:
242
  plan = await formulate_search_plan(user_text)
243
- search_query = plan.get("search_query", user_text)
244
- filter_table = plan.get("filter_table")
245
- search_filter = {"source_table": filter_table} if filter_table else {}
246
 
247
  docs = await run_in_threadpool(
248
  vector_store.similarity_search,
249
- search_query, k=3, filter=search_filter
 
 
250
  )
251
  if not docs:
252
- docs = await run_in_threadpool(vector_store.similarity_search, search_query, k=3)
253
 
254
- context = "\n\n".join([d.page_content for d in docs])
255
 
256
- response = await run_in_threadpool(
257
  client_openai.chat.completions.create,
258
  model=ANSWERER_MODEL,
259
  messages=[
260
  {"role": "system", "content": ANSWER_SYSTEM_PROMPT},
261
  {"role": "system", "content": f"CONTEXT:\n{context}"},
262
- {"role": "user", "content": f"Question: {user_text}"}
263
- ]
264
  )
265
- return response.choices[0].message.content.strip()
266
  except Exception as e:
267
- logging.error(f"RAG error (attempt {attempt+1}): {e}", exc_info=True) # Added exc_info
268
  if attempt == 2:
269
  return "Sorry, I couldn't respond. Please try again."
270
  return "Sorry, I couldn't respond."
271
 
272
 
273
- # --- AUTH ENDPOINT ---
 
 
274
  class TextQuery(BaseModel):
275
  query: str
276
 
 
277
  async def verify_token(x_auth_token: str = Header(...)):
278
  if not SHARED_SECRET or x_auth_token != SHARED_SECRET:
279
  logging.warning("Auth failed for /test-text-query")
280
  raise HTTPException(status_code=401, detail="Invalid token")
281
  logging.info("Auth passed")
282
 
 
283
  @app.post("/test-text-query", dependencies=[Depends(verify_token)])
284
  async def test_text_query_endpoint(query: TextQuery):
285
  logging.info(f"Text query: {query.query}")
@@ -287,58 +294,59 @@ async def test_text_query_endpoint(query: TextQuery):
287
  return {"response": response}
288
 
289
 
290
- # --- GRADIO AUDIO PROCESSING ---
 
 
291
  async def process_audio(audio_path):
292
  if not audio_path or not os.path.exists(audio_path):
293
  return None, "No valid audio file received."
294
 
295
  try:
296
- # Read raw bytes
297
  with open(audio_path, "rb") as f:
298
  audio_bytes = f.read()
299
-
300
- if len(audio_bytes) == 0:
301
  return None, "Empty audio file."
302
 
303
- # 1. Transcribe — pass path + bytes
304
  user_text = await run_in_threadpool(transcribe_audio, audio_path, audio_bytes)
305
  if not user_text:
306
  return None, "Couldn't understand audio. Try again."
307
 
308
  logging.info(f"User: {user_text}")
309
 
310
- # 2. AI Response
311
  agent_response = await get_agent_response(user_text)
312
  if not agent_response:
313
  return None, "No response generated."
314
 
315
  logging.info(f"AI: {agent_response[:100]}...")
316
 
317
- # 3. Generate Speech
318
- ai_audio_bytes = await run_in_threadpool(
319
- generate_elevenlabs_sync, agent_response, ELEVENLABS_VOICE_NAME
320
- )
321
  if not ai_audio_bytes:
322
- # Return the text response even if TTS fails
323
- logging.error("Failed to generate voice. Returning text only.")
324
- return None, f"**You:** {user_text}\n\n**AI:** {agent_response}\n\n_(Audio generation failed)_"
325
-
 
326
 
327
- # Save to temp file
328
  with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as f:
329
  f.write(ai_audio_bytes)
330
  out_path = f.name
331
- logging.info(f"Saved generated audio to temp file: {out_path}")
332
-
333
 
334
  return out_path, f"**You:** {user_text}\n\n**AI:** {agent_response}"
335
 
336
  except Exception as e:
337
- logging.error(f"Audio processing error: {e}", exc_info=True) # Added exc_info
338
  return None, f"Error: {str(e)}"
339
 
340
 
341
- # --- GRADIO UI ---
 
 
342
  with gr.Blocks(title="Real Estate AI") as demo:
343
  gr.Markdown("# Real Estate Voice Assistant")
344
  gr.Markdown("Ask about projects in Pune, Mumbai, Bengaluru, etc.")
@@ -349,11 +357,12 @@ with gr.Blocks(title="Real Estate AI") as demo:
349
 
350
  out_text = gr.Textbox(label="Conversation", lines=8)
351
 
352
- inp.change(process_audio, inp, [out_audio, out_text])
353
 
354
- # Removed examples to avoid FileNotFoundError with text inputs
355
- # gr.Examples(examples=[], inputs=inp)
356
 
357
 
358
- # --- MOUNT GRADIO ---
 
 
359
  app = gr.mount_gradio_app(app, demo, path="/")
 
3
  import json
4
  import re
5
  import tempfile
 
 
6
  import logging
7
  from contextlib import asynccontextmanager
8
  from fastapi import FastAPI, Request, status, Depends, Header, HTTPException
 
18
  # --- GRADIO ---
19
  import gradio as gr
20
 
21
+ # --------------------------------------------------------------------------- #
22
+ # CONFIGURATION
23
+ # --------------------------------------------------------------------------- #
24
+ os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2"
25
+ logging.getLogger("tensorflow").setLevel(logging.ERROR)
26
+ logging.basicConfig(
27
+ level=logging.INFO,
28
+ format="%(asctime)s - %(levelname)s - %(message)s",
29
+ )
30
 
31
  load_dotenv()
32
  NEON_DATABASE_URL = os.getenv("NEON_DATABASE_URL")
 
34
  ELEVENLABS_API_KEY = os.getenv("ELEVENLABS_API_KEY")
35
  SHARED_SECRET = os.getenv("SHARED_SECRET")
36
 
 
37
  COLLECTION_NAME = "real_estate_embeddings"
38
  EMBEDDING_MODEL = "hkunlp/instructor-large"
39
+
40
+ # *** HARD-CODED VOICE ID (as requested) ***
41
+ ELEVENLABS_VOICE_ID = "IvLWq57RKibBrqZGpQrC" # <-- your voice
42
+
43
  PLANNER_MODEL = "gpt-4o-mini"
44
  ANSWERER_MODEL = "gpt-4o"
45
+
46
  TABLE_DESCRIPTIONS = """
47
  - "ongoing_projects_source": Details about projects currently under construction.
48
  - "upcoming_projects_source": Information on future planned projects.
 
52
  - "feedback_source": Customer feedback and ratings for projects.
53
  """
54
 
55
+ # --------------------------------------------------------------------------- #
56
+ # CLIENTS
57
+ # --------------------------------------------------------------------------- #
58
  embeddings = None
59
  vector_store = None
60
  client_openai = OpenAI(api_key=OPENAI_API_KEY)
61
+ client_elevenlabs = None
62
 
63
+ # ---- ElevenLabs init with detailed logging ---------------------------------
64
  try:
65
+ key_preview = (
66
+ f"{ELEVENLABS_API_KEY[:5]}...{ELEVENLABS_API_KEY[-4:]}"
67
+ if ELEVENLABS_API_KEY and len(ELEVENLABS_API_KEY) > 9
68
+ else "None"
69
+ )
70
+ logging.info(f"Initializing ElevenLabs client with key: {key_preview}")
71
 
 
72
  if not ELEVENLABS_API_KEY:
73
+ raise ValueError("ELEVENLABS_API_KEY is missing or empty.")
74
 
75
  client_elevenlabs = ElevenLabs(api_key=ELEVENLABS_API_KEY)
76
+ logging.info(f"ElevenLabs client created – type: {type(client_elevenlabs)}")
77
 
78
+ # Verify we can list voices (optional, but proves the key works)
 
79
  voices = client_elevenlabs.voices.get_all()
80
+ logging.info(f"Fetched {len(voices.voices)} voices from ElevenLabs.")
81
 
82
  except Exception as e:
83
+ logging.error(f"ElevenLabs init failed: {e}", exc_info=True)
84
+ client_elevenlabs = None
85
+
86
+ # ---- Log SDK version -------------------------------------------------------
87
+ try:
88
+ import elevenlabs
89
 
90
+ logging.info(f"elevenlabs SDK version: {elevenlabs.__version__}")
91
+ except Exception:
92
+ logging.error("Could not import elevenlabs package.")
93
 
94
+ # --------------------------------------------------------------------------- #
95
+ # FASTAPI APP
96
+ # --------------------------------------------------------------------------- #
97
  @asynccontextmanager
98
  async def lifespan(app: FastAPI):
99
  global embeddings, vector_store
 
111
  yield
112
  logging.info("Shutting down.")
113
 
 
 
 
 
 
 
 
114
 
115
  app = FastAPI(lifespan=lifespan)
116
 
117
+ # --------------------------------------------------------------------------- #
118
+ # PROMPTS
119
+ # --------------------------------------------------------------------------- #
120
  QUERY_FORMULATION_PROMPT = """
121
  You are a query analysis agent. Transform the user's query into a precise search query and determine the correct table to filter by.
122
  **Available Tables:**
 
127
  2. If status keywords (ongoing, completed, upcoming, etc.) are present, pick the matching table.
128
  3. If no status keyword, set filter_table to null.
129
  4. Return JSON: {{"search_query": "...", "filter_table": "table_name or null"}}
130
+ """.strip()
131
 
132
  ANSWER_SYSTEM_PROMPT = """
133
  You are an expert AI assistant for a premier real estate developer.
 
139
  1. Match user language (Hinglish → Hinglish, English → English).
140
  2. Use CONTEXT if available, else use core knowledge.
141
  3. Only answer real estate questions.
142
+ """.strip()
 
143
 
144
+ # --------------------------------------------------------------------------- #
145
+ # AUDIO & LLM HELPERS
146
+ # --------------------------------------------------------------------------- #
147
  def transcribe_audio(audio_path: str, audio_bytes: bytes) -> str:
148
  for attempt in range(3):
149
  try:
150
  audio_file = io.BytesIO(audio_bytes)
151
+ filename = os.path.basename(audio_path)
 
 
152
 
153
+ logging.info(f"Transcribing {filename} ({len(audio_bytes)} bytes)")
154
  transcript = client_openai.audio.transcriptions.create(
155
  model="whisper-1",
156
+ file=(filename, audio_file),
157
  )
158
  text = transcript.text.strip()
159
 
160
  # Hinglish transliteration
161
+ if re.search(r"[\u0900-\u097F]", text):
162
+ resp = client_openai.chat.completions.create(
163
  model="gpt-4o-mini",
164
+ messages=[
165
+ {"role": "user", "content": f"Transliterate to Roman (Hinglish): {text}"}
166
+ ],
167
+ temperature=0.0,
168
  )
169
+ text = resp.choices[0].message.content.strip()
170
 
171
  logging.info(f"Transcribed: {text}")
172
  return text
 
173
  except Exception as e:
174
+ logging.error(f"Transcription error (attempt {attempt + 1}): {e}", exc_info=True)
175
  if attempt == 2:
176
  return ""
177
  return ""
178
 
179
+
180
+ def generate_elevenlabs_sync(text: str) -> bytes:
181
+ """
182
+ Uses the **hard-coded voice ID** and the correct SDK method
183
+ `client.text_to_speech.convert`.
184
+ """
185
  if client_elevenlabs is None:
186
+ logging.error("ElevenLabs client not initialized skipping TTS.")
187
+ return b""
 
188
 
189
  for attempt in range(3):
190
  try:
191
+ logging.info("Calling ElevenLabs text_to_speech.convert...")
192
+ stream = client_elevenlabs.text_to_speech.convert(
193
+ voice_id=ELEVENLABS_VOICE_ID,
194
  text=text,
 
195
  model="eleven_multilingual_v2",
196
+ output_format="mp3_44100_128",
197
  )
198
+ # The SDK returns a generator of bytes collect everything
199
+ audio_bytes = b""
200
+ for chunk in stream:
201
+ if chunk:
202
+ audio_bytes += chunk
203
+ logging.info(f"TTS returned {len(audio_bytes)} bytes.")
204
+ return audio_bytes
 
 
 
 
 
205
  except Exception as e:
206
+ logging.error(
207
+ f"ElevenLabs TTS error (attempt {attempt + 1}): {e}", exc_info=True
208
+ )
209
  if attempt == 2:
210
+ return b""
211
+ return b""
212
+
213
 
214
  async def formulate_search_plan(user_query: str) -> dict:
215
+ logging.info(f"Formulating search plan for: {user_query}")
216
  for attempt in range(3):
217
  try:
218
+ formatted = QUERY_FORMULATION_PROMPT.format(
219
+ table_descriptions=TABLE_DESCRIPTIONS, user_query=user_query
 
 
220
  )
221
+ resp = await run_in_threadpool(
 
222
  client_openai.chat.completions.create,
223
  model=PLANNER_MODEL,
224
+ messages=[{"role": "user", "content": formatted}],
225
  response_format={"type": "json_object"},
226
+ temperature=0.0,
227
  )
228
+ raw = resp.choices[0].message.content
229
+ logging.info(f"Planner raw response: {raw}")
230
+ plan = json.loads(raw)
231
+ logging.info(f"Parsed plan: {plan}")
 
 
 
232
  return plan
233
  except Exception as e:
234
+ logging.error(f"Planner error (attempt {attempt + 1}): {e}", exc_info=True)
 
235
  if attempt == 2:
 
236
  return {"search_query": user_query, "filter_table": None}
 
 
237
  return {"search_query": user_query, "filter_table": None}
238
 
239
+
240
  async def get_agent_response(user_text: str) -> str:
241
  for attempt in range(3):
242
  try:
243
  plan = await formulate_search_plan(user_text)
244
+ search_q = plan.get("search_query", user_text)
245
+ filter_tbl = plan.get("filter_table")
246
+ search_filter = {"source_table": filter_tbl} if filter_tbl else {}
247
 
248
  docs = await run_in_threadpool(
249
  vector_store.similarity_search,
250
+ search_q,
251
+ k=3,
252
+ filter=search_filter,
253
  )
254
  if not docs:
255
+ docs = await run_in_threadpool(vector_store.similarity_search, search_q, k=3)
256
 
257
+ context = "\n\n".join(d.page_content for d in docs)
258
 
259
+ resp = await run_in_threadpool(
260
  client_openai.chat.completions.create,
261
  model=ANSWERER_MODEL,
262
  messages=[
263
  {"role": "system", "content": ANSWER_SYSTEM_PROMPT},
264
  {"role": "system", "content": f"CONTEXT:\n{context}"},
265
+ {"role": "user", "content": f"Question: {user_text}"},
266
+ ],
267
  )
268
+ return resp.choices[0].message.content.strip()
269
  except Exception as e:
270
+ logging.error(f"RAG error (attempt {attempt + 1}): {e}", exc_info=True)
271
  if attempt == 2:
272
  return "Sorry, I couldn't respond. Please try again."
273
  return "Sorry, I couldn't respond."
274
 
275
 
276
+ # --------------------------------------------------------------------------- #
277
+ # AUTH ENDPOINT
278
+ # --------------------------------------------------------------------------- #
279
  class TextQuery(BaseModel):
280
  query: str
281
 
282
+
283
  async def verify_token(x_auth_token: str = Header(...)):
284
  if not SHARED_SECRET or x_auth_token != SHARED_SECRET:
285
  logging.warning("Auth failed for /test-text-query")
286
  raise HTTPException(status_code=401, detail="Invalid token")
287
  logging.info("Auth passed")
288
 
289
+
290
  @app.post("/test-text-query", dependencies=[Depends(verify_token)])
291
  async def test_text_query_endpoint(query: TextQuery):
292
  logging.info(f"Text query: {query.query}")
 
294
  return {"response": response}
295
 
296
 
297
+ # --------------------------------------------------------------------------- #
298
+ # GRADIO PIPELINE
299
+ # --------------------------------------------------------------------------- #
300
  async def process_audio(audio_path):
301
  if not audio_path or not os.path.exists(audio_path):
302
  return None, "No valid audio file received."
303
 
304
  try:
305
+ # ---- 1. READ RAW BYTES ------------------------------------------------
306
  with open(audio_path, "rb") as f:
307
  audio_bytes = f.read()
308
+ if not audio_bytes:
 
309
  return None, "Empty audio file."
310
 
311
+ # ---- 2. TRANSCRIBE ----------------------------------------------------
312
  user_text = await run_in_threadpool(transcribe_audio, audio_path, audio_bytes)
313
  if not user_text:
314
  return None, "Couldn't understand audio. Try again."
315
 
316
  logging.info(f"User: {user_text}")
317
 
318
+ # ---- 3. GET AI RESPONSE -----------------------------------------------
319
  agent_response = await get_agent_response(user_text)
320
  if not agent_response:
321
  return None, "No response generated."
322
 
323
  logging.info(f"AI: {agent_response[:100]}...")
324
 
325
+ # ---- 4. TEXT-TO-SPEECH ------------------------------------------------
326
+ ai_audio_bytes = await run_in_threadpool(generate_elevenlabs_sync, agent_response)
 
 
327
  if not ai_audio_bytes:
328
+ logging.error("TTS failed returning text only.")
329
+ return (
330
+ None,
331
+ f"**You:** {user_text}\n\n**AI:** {agent_response}\n\n_(Audio generation failed)_",
332
+ )
333
 
334
+ # Save to a temporary file for Gradio
335
  with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as f:
336
  f.write(ai_audio_bytes)
337
  out_path = f.name
338
+ logging.info(f"Saved TTS audio to {out_path}")
 
339
 
340
  return out_path, f"**You:** {user_text}\n\n**AI:** {agent_response}"
341
 
342
  except Exception as e:
343
+ logging.error(f"Audio processing error: {e}", exc_info=True)
344
  return None, f"Error: {str(e)}"
345
 
346
 
347
+ # --------------------------------------------------------------------------- #
348
+ # GRADIO UI
349
+ # --------------------------------------------------------------------------- #
350
  with gr.Blocks(title="Real Estate AI") as demo:
351
  gr.Markdown("# Real Estate Voice Assistant")
352
  gr.Markdown("Ask about projects in Pune, Mumbai, Bengaluru, etc.")
 
357
 
358
  out_text = gr.Textbox(label="Conversation", lines=8)
359
 
360
+ inp.change(process_audio, inputs=inp, outputs=[out_audio, out_text])
361
 
362
+ # No examples they caused FileNotFound errors when clicking text.
 
363
 
364
 
365
+ # --------------------------------------------------------------------------- #
366
+ # MOUNT GRADIO
367
+ # --------------------------------------------------------------------------- #
368
  app = gr.mount_gradio_app(app, demo, path="/")