WanIrfan commited on
Commit
4a20c7a
·
verified ·
1 Parent(s): 6078322

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +162 -305
app.py CHANGED
@@ -1,4 +1,5 @@
1
  from flask import Flask, request, render_template, session, url_for, redirect, jsonify
 
2
  from langchain_core.messages import HumanMessage, AIMessage
3
  from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
4
  import os
@@ -10,10 +11,9 @@ import shutil
10
  import zipfile
11
  from dotenv import load_dotenv
12
  from huggingface_hub import hf_hub_download
 
13
 
14
  # --- Core Application Imports ---
15
- # Make sure you have an empty __init__.py file in your 'src' folder
16
-
17
  from src.medical_swarm import run_medical_swarm
18
  from src.utils import load_rag_system, standardize_query, get_standalone_question, parse_agent_response, markdown_bold_to_html
19
  from langchain_google_genai import ChatGoogleGenerativeAI
@@ -25,74 +25,64 @@ logger = logging.getLogger(__name__)
25
  # Load environment variables
26
  load_dotenv()
27
 
28
- # --- 1. DATABASE SETUP FUNCTION (For Deployment) ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
  def setup_database():
30
  """Downloads and unzips the ChromaDB folder from Hugging Face Datasets."""
31
-
32
- # --- !!! IMPORTANT !!! ---
33
- # YOU MUST CHANGE THIS to your Hugging Face Dataset repo ID
34
- # For example: "your_username/your_database_repo_name"
35
  DATASET_REPO_ID = "WanIrfan/atlast-db"
36
- # -------------------------
37
-
38
  ZIP_FILENAME = "chroma_db.zip"
39
  DB_DIR = "chroma_db"
40
-
41
  if os.path.exists(DB_DIR) and os.listdir(DB_DIR):
42
  logger.info("✅ Database directory already exists. Skipping download.")
43
  return
44
-
45
  logger.info(f"📥 Downloading database from HF Hub: {DATASET_REPO_ID}")
46
  try:
47
- zip_path = hf_hub_download(
48
- repo_id=DATASET_REPO_ID,
49
- filename=ZIP_FILENAME,
50
- repo_type="dataset",
51
- # You might need to add your HF token to secrets if the dataset is private
52
- # token=os.getenv("HF_TOKEN")
53
- )
54
-
55
  logger.info(f"📦 Unzipping database from {zip_path}...")
56
  with zipfile.ZipFile(zip_path, 'r') as zip_ref:
57
- zip_ref.extractall(".") # Extracts to the root, creating ./chroma_db
58
-
59
  logger.info("✅ Database setup complete!")
60
-
61
- # Clean up the downloaded zip file to save space
62
  if os.path.exists(zip_path):
63
  os.remove(zip_path)
64
-
65
  except Exception as e:
66
  logger.error(f"❌ CRITICAL ERROR setting up database: {e}", exc_info=True)
67
- # This will likely cause the RAG system to fail loading, which is expected
68
- # if the database isn't available.
69
 
70
  # --- RUN DATABASE SETUP *BEFORE* INITIALIZING THE APP ---
71
  setup_database()
72
 
73
-
74
  # --- STANDARD FLASK APP INITIALIZATION ---
75
  app = Flask(__name__)
76
  app.secret_key = "a_really_strong_static_secret_key_12345"
77
- # Configure cookie-based sessions with larger payload
78
- app.config['SESSION_COOKIE_SECURE'] = False # Set True if using HTTPS
79
- app.config['SESSION_COOKIE_HTTPONLY'] = True
80
- app.config['SESSION_COOKIE_SAMESITE'] = 'Lax'
81
- app.config['PERMANENT_SESSION_LIFETIME'] = 3600 # 1 hour
82
- app.config['MAX_CONTENT_LENGTH'] = 16 * 1024 * 1024 # 16MB max
83
- # # --- CONFIGURE SERVER-SIDE SESSIONS ---
84
- # app.config["SESSION_PERMANENT"] = False
85
- # app.config["SESSION_TYPE"] = "filesystem"
86
- # app.config["SESSION_FILE_DIR"] = "/app/flask_session" # Explicitly tell Flask where to write
87
- # Session(app)
88
 
89
  google_api_key = os.getenv("GOOGLE_API_KEY")
90
  if not google_api_key:
91
- logger.warning("⚠️ GOOGLE_API_KEY not found in environment variables. LLM calls will fail.")
92
  else:
93
  logger.info("GOOGLE_API_KEY loaded successfully.")
94
 
95
- # Initialize LLM
96
  llm = ChatGoogleGenerativeAI(model="gemini-2.5-flash", temperature=0.05, google_api_key=google_api_key)
97
 
98
  # --- LOAD RAG SYSTEMS (AFTER DB SETUP) ---
@@ -104,410 +94,251 @@ try:
104
  'insurance': load_rag_system(collection_name="etiqa_Agentic_retrieval", domain="insurance")
105
  }
106
  except Exception as e:
107
- logger.error(f"❌ FAILED to load RAG systems. Check database path and permissions. Error: {e}", exc_info=True)
108
  rag_systems = {'medical': None, 'islamic': None, 'insurance': None}
109
 
110
- # Store systems and LLM on the app for blueprints
111
  app.rag_systems = rag_systems
112
  app.llm = llm
113
 
114
-
115
- # Check initialization status
116
  logger.info("\n📊 SYSTEM STATUS:")
117
  for domain, system in rag_systems.items():
118
  status = "✅ Ready" if system else "❌ Failed (DB missing?)"
119
  logger.info(f" {domain}: {status}")
120
 
121
- def hydrate_history(raw_history_list: list) -> list:
122
- """Converts a list of dicts from session back into LangChain Message objects."""
123
- history = []
124
- if not raw_history_list:
125
- return history
126
- for item in raw_history_list:
127
- if item.get('type') == 'human':
128
- history.append(HumanMessage(content=item.get('content', '')))
129
- elif item.get('type') == 'ai':
130
- history.append(AIMessage(content=item.get('content', '')))
131
- return history
132
-
133
- def dehydrate_history(history_messages: list) -> list:
134
- """Converts LangChain Message objects into a JSON-serializable list of dicts."""
135
- raw_list = []
136
- for msg in history_messages:
137
- if isinstance(msg, HumanMessage):
138
- raw_list.append({'type': 'human', 'content': msg.content})
139
- elif isinstance(msg, AIMessage):
140
- raw_list.append({'type': 'ai', 'content': msg.content})
141
- return raw_list
142
-
143
- # --- FLASK ROUTES ---
144
-
145
  @app.route("/")
146
  def homePage():
147
- # Clear all session history when visiting the home page
148
- session.pop('medical_history', None)
149
- session.pop('islamic_history', None)
150
- session.pop('insurance_history', None)
151
- session.pop('current_medical_document', None)
152
  return render_template("homePage.html")
153
 
154
-
155
  @app.route("/medical", methods=["GET", "POST"])
156
  def medical_page():
157
  if request.method == "GET":
158
- # USE .get() instead of .pop() - don't remove it yet
159
- latest_response = session.get('latest_medical_response', {})
160
-
161
- answer = latest_response.get('answer', "")
162
- thoughts = latest_response.get('thoughts', "")
163
- validation = latest_response.get('validation', "")
164
- source = latest_response.get('source', "")
165
-
166
- # ✅ NOW clear it after reading (for next request)
167
- if latest_response:
168
- session.pop('latest_medical_response', None)
169
- session.modified = True
170
-
171
- # Load history
172
- raw_history_list = session.get('medical_history', [])
173
- history = hydrate_history(raw_history_list)
174
-
175
  return render_template("medical_page.html",
176
- history=history, # ✅ Pass hydrated history
177
- answer=answer,
178
- thoughts=thoughts,
179
- validation=validation,
180
- source=source)
181
 
182
- # POST Request
183
  answer, thoughts, validation, source = "", "", "", ""
184
  raw_history_list = session.get('medical_history', [])
185
  history_for_agent = hydrate_history(raw_history_list)
186
  current_medical_document = session.get('current_medical_document', "")
 
187
 
188
  try:
189
- query = standardize_query(request.form.get("query", ""))
190
  has_image = 'image' in request.files and request.files['image'].filename
191
  has_document = 'document' in request.files and request.files['document'].filename
192
- has_query = request.form.get("query") or request.form.get("question", "")
193
-
194
- logger.info(f"POST request received: has_image={has_image}, has_document={has_document}, has_query={has_query}")
195
-
196
  if has_document:
197
- logger.info("Processing Scenario 3: Query + Document with Medical Swarm")
198
  file = request.files['document']
199
- try:
200
- document_text = file.read().decode("utf-8")
201
- session['current_medical_document'] = document_text
202
- current_medical_document = document_text
203
- except UnicodeDecodeError:
204
- answer = "Error: Could not decode the uploaded document. Please ensure it is a valid text or PDF file."
205
- logger.error("Scenario 3: Document decode error")
206
- thoughts = traceback.format_exc()
207
-
208
  swarm_answer = run_medical_swarm(current_medical_document, query)
209
  answer = markdown_bold_to_html(swarm_answer)
210
-
211
- thoughts = "Swarm analysis complete. The process is orchestrated and does not use the ReAct thought process. You can now ask follow-up questions."
212
  source = "Medical Swarm"
213
- validation = "Swarm output generated."
214
-
215
  history_for_agent.append(HumanMessage(content=f"[Document Uploaded] Query: '{query}'"))
216
  history_for_agent.append(AIMessage(content=answer))
217
-
218
- elif has_image:
219
  logger.info("Processing Multimodal RAG: Query + Image")
220
  file = request.files['image']
221
  upload_dir = "Uploads"
222
  os.makedirs(upload_dir, exist_ok=True)
223
  image_path = os.path.join(upload_dir, file.filename)
224
-
225
  try:
226
- file.save(image_path)
227
- file.close()
228
-
229
  with open(image_path, "rb") as img_file:
230
  img_data = base64.b64encode(img_file.read()).decode("utf-8")
231
-
232
- vision_prompt = f"Analyze this image and identify the main subject in a single, concise sentence. The user's query is: '{query}'"
233
- message = HumanMessage(content=[
234
- {"type": "text", "text": vision_prompt},
235
- {"type": "image_url", "image_url": f"data:image/jpeg;base64,{img_data}"}
236
- ])
237
- vision_response = llm.invoke([message])
238
- visual_prediction = vision_response.content
239
- logger.info(f"Vision Prediction: {visual_prediction}")
240
-
241
- enhanced_query = (
242
- f'User Query: "{query}" '
243
- f'Context from an image provided by the LLM: "{visual_prediction}" '
244
- 'Based on the user\'s query and the context from LLM, provide a comprehensive answer.'
245
- )
246
- logger.info(f"Enhanced query: {enhanced_query}")
247
-
248
  agent = rag_systems['medical']
249
- if not agent:
250
- raise Exception("Medical RAG system is not loaded.")
251
-
252
  response_dict = agent.answer(enhanced_query, chat_history=history_for_agent)
253
  answer, thoughts, validation, source = parse_agent_response(response_dict)
254
-
255
- history_for_agent.append(HumanMessage(content=query))
256
  history_for_agent.append(AIMessage(content=answer))
257
-
258
  finally:
259
  if os.path.exists(image_path):
260
- try:
261
- os.remove(image_path)
262
- logger.info(f"Successfully deleted temporary image file: {image_path}")
263
- except PermissionError as e:
264
- logger.warning(f"Could not remove {image_path}: {e}")
265
 
266
  elif query:
267
  history_doc_context = history_for_agent
268
  if current_medical_document:
269
- logger.info("Processing Follow-up Query for Document")
270
- history_doc_context = [HumanMessage(content=f"We are discussing this document:\n{current_medical_document}")] + history_for_agent
271
  else:
272
  logger.info("Processing Text RAG query for Medical domain")
273
 
274
- logger.info(f"Original Query: '{query}'")
275
  standalone_query = get_standalone_question(query, history_doc_context, llm)
276
- logger.info(f"Standalone Query: '{standalone_query}'")
277
-
278
  agent = rag_systems['medical']
279
- if not agent:
280
- raise Exception("Medical RAG system is not loaded.")
281
-
282
  response_dict = agent.answer(standalone_query, chat_history=history_doc_context)
283
  answer, thoughts, validation, source = parse_agent_response(response_dict)
284
-
285
  history_for_agent.append(HumanMessage(content=query))
286
  history_for_agent.append(AIMessage(content=answer))
287
-
288
- else:
289
- raise ValueError("No query or file provided.")
290
-
291
  except Exception as e:
292
  logger.error(f"Error on /medical page: {e}", exc_info=True)
293
  answer = f"An error occurred: {e}"
294
  thoughts = traceback.format_exc()
 
 
 
 
295
 
296
- # ✅ DEHYDRATE history back to dicts
297
  session['medical_history'] = dehydrate_history(history_for_agent)
298
-
299
- # ✅ Save the response
300
- session['latest_medical_response'] = {
301
- 'answer': answer,
302
- 'thoughts': thoughts,
303
- 'validation': validation,
304
- 'source': source
305
- }
306
  session.modified = True
307
-
308
- # ✅ ADD DEBUG LOG
309
- logger.info(f"💾 SAVED TO SESSION - Answer length: {len(answer)}, First 100 chars: {answer[:100]}")
310
- logger.info(f"💾 Session ID: {session.get('_id', 'NO ID')}")
311
- logger.info(f"💾 History length: {len(history_for_agent)}")
312
 
 
313
  return redirect(url_for('medical_page'))
314
 
315
  @app.route("/medical/clear")
316
  def clear_medical_chat():
317
  session.pop('medical_history', None)
318
  session.pop('current_medical_document', None)
319
- logger.info("Medical chat history cleared.")
320
  return redirect(url_for('medical_page'))
321
 
 
322
  @app.route("/islamic", methods=["GET", "POST"])
323
  def islamic_page():
324
- #Use session
325
-
326
  if request.method == "GET":
327
- # Load all latest data from session (or default to empty if not found)
328
- latest_response = session.pop('latest_islamic_response', {}) # POP to clear it after one display
329
-
330
- answer = latest_response.get('answer', "")
331
- thoughts = latest_response.get('thoughts', "")
332
- validation = latest_response.get('validation', "")
333
- source = latest_response.get('source', "")
334
-
335
- # Clear history only when a user first navigates (no latest_response and no current history)
336
- if not latest_response and 'islamic_history' not in session:
337
- session.pop('islamic_history', None)
338
-
339
- return render_template("islamic_page.html",
340
  history=session.get('islamic_history', []),
341
- answer=answer,
342
- thoughts=thoughts,
343
- validation=validation,
344
- source=source)
345
 
346
- # POST Request Logic
347
  answer, thoughts, validation, source = "", "", "", ""
348
- history = session.get('islamic_history', [])
349
-
350
- # This try/except block wraps the ENTIRE POST logic
351
  try:
352
  query = standardize_query(request.form.get("query", ""))
353
  has_image = 'image' in request.files and request.files['image'].filename
354
-
355
- final_query = query # Default to the original query
 
356
 
357
  if has_image:
358
  logger.info("Processing Multimodal RAG query for Islamic domain")
359
-
360
  file = request.files['image']
361
-
362
  upload_dir = "Uploads"
363
  os.makedirs(upload_dir, exist_ok=True)
364
  image_path = os.path.join(upload_dir, file.filename)
365
-
366
  try:
367
- file.save(image_path)
368
- file.close()
369
-
370
  with open(image_path, "rb") as img_file:
371
  img_base64 = base64.b64encode(img_file.read()).decode("utf-8")
372
-
373
- vision_prompt = f"Analyze this image's main subject. User's query is: '{query}'"
374
  message = HumanMessage(content=[{"type": "text", "text": vision_prompt}, {"type": "image_url", "image_url": f"data:image/jpeg;base64,{img_base64}"}])
375
  visual_prediction = llm.invoke([message]).content
376
-
377
- enhanced_query = (
378
- f'User Query: "{query}" '
379
- f'Context from an image provided by the LLM: "{visual_prediction}" '
380
- 'Based on the user\'s query and the context from LLM, provide a comprehensive answer.'
381
- )
382
- logger.info(f"Create enchanced query : {enhanced_query}")
383
-
384
- final_query = enhanced_query
385
-
386
  finally:
387
  if os.path.exists(image_path):
388
- try:
389
- os.remove(image_path)
390
- logger.info(f"Successfully cleaned up {image_path}")
391
- except PermissionError as e:
392
- logger.warning(f"Could not remove {image_path} after processing. "
393
- f"File may be locked. Error: {e}")
394
-
395
- elif query: # Only run text logic if there's a query and no image
396
  logger.info("Processing Text RAG query for Islamic domain")
397
- standalone_query = get_standalone_question(query, history,llm)
398
- logger.info(f"Original Query: '{query}'")
399
- print(f"📚 Using chat history with {len(history)} previous messages to create standalone query")
400
- logger.info(f"Standalone Query: '{standalone_query}'")
401
- final_query = standalone_query
402
 
403
- if not final_query:
404
- raise ValueError("No query or file provided.")
405
-
406
  agent = rag_systems['islamic']
407
  if not agent: raise Exception("Islamic RAG system is not loaded.")
408
- response_dict = agent.answer(final_query, chat_history=history)
409
- answer, thoughts , validation, source = parse_agent_response(response_dict)
410
- history.append(HumanMessage(content=query))
411
- history.append(AIMessage(content=answer))
412
 
413
  except Exception as e:
414
  logger.error(f"Error on /islamic page: {e}", exc_info=True)
415
- answer = f"An error occurred: {e}"
416
- thoughts = traceback.format_exc()
417
-
418
- # Save updated history and LATEST RESPONSE DATA back to the session
419
- session['islamic_history'] = history
420
- session['latest_islamic_response'] = {
421
- 'answer': answer,
422
- 'thoughts': thoughts,
423
- 'validation': validation,
424
- 'source': source
425
- }
426
  session.modified = True
427
- # --- ADD THIS DEBUG LINE ---
428
- logger.info(f"DEBUG: Saving to session: ANSWER='{answer[:50]}...', THOUGHTS='{thoughts[:50]}...'")
429
- logger.debug(f"Redirecting after saving latest response.")
430
  return redirect(url_for('islamic_page'))
431
 
432
  @app.route("/islamic/clear")
433
  def clear_islamic_chat():
434
  session.pop('islamic_history', None)
435
- logger.info("Islamic chat history cleared.")
436
  return redirect(url_for('islamic_page'))
437
 
 
438
  @app.route("/insurance", methods=["GET", "POST"])
439
  def insurance_page():
440
  if request.method == "GET" :
441
  latest_response = session.pop('latest_insurance_response',{})
442
-
443
- answer = latest_response.get('answer', "")
444
- thoughts = latest_response.get('thoughts', "")
445
- validation = latest_response.get('validation', "")
446
- source = latest_response.get('source', "")
447
-
448
- if not latest_response and 'insurance_history' not in session:
449
- session.pop('insurance_history', None)
450
-
451
- return render_template("insurance_page.html", # You will need to create this HTML file
452
  history=session.get('insurance_history', []),
453
- answer=answer,
454
- thoughts=thoughts,
455
- validation=validation,
456
- source=source)
457
 
458
- # POST Request Logic
459
  answer, thoughts, validation, source = "", "", "", ""
460
- history = session.get('insurance_history', [])
461
-
 
462
  try:
463
  query = standardize_query(request.form.get("query", ""))
464
-
465
- if query:
466
- logger.info("Processing Text RAG query for Insurance domain")
467
- standalone_query = get_standalone_question(query, history, llm)
468
- logger.info(f"Original Query: '{query}'")
469
- logger.info(f"Standalone Query: '{standalone_query}'")
470
-
471
- agent = rag_systems['insurance']
472
- if not agent: raise Exception("Insurance RAG system is not loaded.")
473
- response_dict = agent.answer(standalone_query, chat_history=history)
474
- answer, thoughts, validation, source = parse_agent_response(response_dict)
475
-
476
- history.append(HumanMessage(content=query))
477
- history.append(AIMessage(content=answer))
478
- else:
479
  raise ValueError("No query provided.")
 
 
 
 
 
 
 
 
 
480
 
481
  except Exception as e:
482
  logger.error(f"Error on /insurance page: {e}", exc_info=True)
483
- answer = f"An error occurred: {e}"
484
- thoughts = traceback.format_exc()
485
-
486
- session['insurance_history'] = history
487
- session['latest_insurance_response'] = {
488
- 'answer': answer,
489
- 'thoughts': thoughts,
490
- 'validation': validation,
491
- 'source': source
492
- }
493
  session.modified = True
494
-
495
  logger.debug(f"Redirecting after saving latest response.")
496
  return redirect(url_for('insurance_page'))
497
 
498
  @app.route("/insurance/clear")
499
  def clear_insurance_chat():
500
  session.pop('insurance_history', None)
501
- logger.info("Insurance chat history cleared.")
502
  return redirect(url_for('insurance_page'))
503
 
504
  @app.route("/about", methods=["GET"])
505
  def about():
506
  return render_template("about.html")
507
 
 
508
  @app.route('/metrics/<domain>')
509
  def get_metrics(domain):
510
- """API endpoint to get metrics for a specific domain."""
511
  try:
512
  if domain == "medical" and rag_systems['medical']:
513
  stats = rag_systems['medical'].metrics_tracker.get_stats()
@@ -519,14 +350,12 @@ def get_metrics(domain):
519
  return jsonify({"error": f"{domain} RAG system not loaded"}), 500
520
  else:
521
  return jsonify({"error": "Invalid domain"}), 400
522
-
523
  return jsonify(stats)
524
  except Exception as e:
525
  return jsonify({"error": str(e)}), 500
526
 
527
  @app.route('/metrics/reset/<domain>', methods=['POST'])
528
  def reset_metrics(domain):
529
- """Reset metrics for a domain (useful for testing)."""
530
  try:
531
  if domain == "medical" and rag_systems['medical']:
532
  rag_systems['medical'].metrics_tracker.reset_metrics()
@@ -538,12 +367,40 @@ def reset_metrics(domain):
538
  return jsonify({"error": f"{domain} RAG system not loaded"}), 500
539
  else:
540
  return jsonify({"error": "Invalid domain"}), 400
541
-
542
  return jsonify({"success": True, "message": f"Metrics reset for {domain}"})
543
  except Exception as e:
544
  return jsonify({"error": str(e)}), 500
545
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
546
  if __name__ == "__main__":
547
  logger.info("Starting Flask app for deployment testing...")
548
- # This port 7860 is what Hugging Face Spaces expects by default
549
  app.run(host="0.0.0.0", port=7860, debug=False)
 
1
  from flask import Flask, request, render_template, session, url_for, redirect, jsonify
2
+ # from flask_session import Session <--- REMOVED
3
  from langchain_core.messages import HumanMessage, AIMessage
4
  from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
5
  import os
 
11
  import zipfile
12
  from dotenv import load_dotenv
13
  from huggingface_hub import hf_hub_download
14
+ from PIL import Image
15
 
16
  # --- Core Application Imports ---
 
 
17
  from src.medical_swarm import run_medical_swarm
18
  from src.utils import load_rag_system, standardize_query, get_standalone_question, parse_agent_response, markdown_bold_to_html
19
  from langchain_google_genai import ChatGoogleGenerativeAI
 
25
  # Load environment variables
26
  load_dotenv()
27
 
28
+ # --- 1. NEW HELPER FUNCTIONS TO FIX 'TypeError' ---
29
+ def hydrate_history(raw_history_list: list) -> list:
30
+ """Converts a list of dicts from session back into LangChain Message objects."""
31
+ history = []
32
+ if not raw_history_list:
33
+ return history
34
+ for item in raw_history_list:
35
+ if item.get('type') == 'human':
36
+ history.append(HumanMessage(content=item.get('content', '')))
37
+ elif item.get('type') == 'ai':
38
+ history.append(AIMessage(content=item.get('content', '')))
39
+ return history
40
+
41
+ def dehydrate_history(history_messages: list) -> list:
42
+ """Converts LangChain Message objects into a JSON-serializable list of dicts."""
43
+ raw_list = []
44
+ for msg in history_messages:
45
+ if isinstance(msg, HumanMessage):
46
+ raw_list.append({'type': 'human', 'content': msg.content})
47
+ elif isinstance(msg, AIMessage):
48
+ raw_list.append({'type': 'ai', 'content': msg.content})
49
+ return raw_list
50
+
51
+ # --- 2. DATABASE SETUP FUNCTION (For Deployment) ---
52
  def setup_database():
53
  """Downloads and unzips the ChromaDB folder from Hugging Face Datasets."""
 
 
 
 
54
  DATASET_REPO_ID = "WanIrfan/atlast-db"
 
 
55
  ZIP_FILENAME = "chroma_db.zip"
56
  DB_DIR = "chroma_db"
 
57
  if os.path.exists(DB_DIR) and os.listdir(DB_DIR):
58
  logger.info("✅ Database directory already exists. Skipping download.")
59
  return
 
60
  logger.info(f"📥 Downloading database from HF Hub: {DATASET_REPO_ID}")
61
  try:
62
+ zip_path = hf_hub_download(repo_id=DATASET_REPO_ID, filename=ZIP_FILENAME, repo_type="dataset")
 
 
 
 
 
 
 
63
  logger.info(f"📦 Unzipping database from {zip_path}...")
64
  with zipfile.ZipFile(zip_path, 'r') as zip_ref:
65
+ zip_ref.extractall(".")
 
66
  logger.info("✅ Database setup complete!")
 
 
67
  if os.path.exists(zip_path):
68
  os.remove(zip_path)
 
69
  except Exception as e:
70
  logger.error(f"❌ CRITICAL ERROR setting up database: {e}", exc_info=True)
 
 
71
 
72
  # --- RUN DATABASE SETUP *BEFORE* INITIALIZING THE APP ---
73
  setup_database()
74
 
 
75
  # --- STANDARD FLASK APP INITIALIZATION ---
76
  app = Flask(__name__)
77
  app.secret_key = "a_really_strong_static_secret_key_12345"
78
+ # --- REMOVED flask_session CONFIG ---
 
 
 
 
 
 
 
 
 
 
79
 
80
  google_api_key = os.getenv("GOOGLE_API_KEY")
81
  if not google_api_key:
82
+ logger.warning("⚠️ GOOGLE_API_KEY not found.")
83
  else:
84
  logger.info("GOOGLE_API_KEY loaded successfully.")
85
 
 
86
  llm = ChatGoogleGenerativeAI(model="gemini-2.5-flash", temperature=0.05, google_api_key=google_api_key)
87
 
88
  # --- LOAD RAG SYSTEMS (AFTER DB SETUP) ---
 
94
  'insurance': load_rag_system(collection_name="etiqa_Agentic_retrieval", domain="insurance")
95
  }
96
  except Exception as e:
97
+ logger.error(f"❌ FAILED to load RAG systems. Error: {e}", exc_info=True)
98
  rag_systems = {'medical': None, 'islamic': None, 'insurance': None}
99
 
 
100
  app.rag_systems = rag_systems
101
  app.llm = llm
102
 
 
 
103
  logger.info("\n📊 SYSTEM STATUS:")
104
  for domain, system in rag_systems.items():
105
  status = "✅ Ready" if system else "❌ Failed (DB missing?)"
106
  logger.info(f" {domain}: {status}")
107
 
108
+ # --- FLASK WEB UI ROUTES ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
109
  @app.route("/")
110
  def homePage():
111
+ session.clear() # Clear all keys
 
 
 
 
112
  return render_template("homePage.html")
113
 
114
+ # --- MEDICAL PAGE ---
115
  @app.route("/medical", methods=["GET", "POST"])
116
  def medical_page():
117
  if request.method == "GET":
118
+ latest_response = session.pop('latest_medical_response', {})
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
119
  return render_template("medical_page.html",
120
+ history=session.get('medical_history', []),
121
+ answer=latest_response.get('answer', ""),
122
+ thoughts=latest_response.get('thoughts', ""),
123
+ validation=latest_response.get('validation', ""),
124
+ source=latest_response.get('source', ""))
125
 
 
126
  answer, thoughts, validation, source = "", "", "", ""
127
  raw_history_list = session.get('medical_history', [])
128
  history_for_agent = hydrate_history(raw_history_list)
129
  current_medical_document = session.get('current_medical_document', "")
130
+ query = ""
131
 
132
  try:
133
+ query=standardize_query(request.form.get("query", ""))
134
  has_image = 'image' in request.files and request.files['image'].filename
135
  has_document = 'document' in request.files and request.files['document'].filename
136
+
137
+ if not (query or has_image or has_document):
138
+ raise ValueError("No query or file provided.")
139
+
140
  if has_document:
141
+ logger.info("Processing Document with Medical Swarm")
142
  file = request.files['document']
143
+ document_text = file.read().decode("utf-8")
144
+ session['current_medical_document'] = document_text
145
+ current_medical_document = document_text
 
 
 
 
 
 
146
  swarm_answer = run_medical_swarm(current_medical_document, query)
147
  answer = markdown_bold_to_html(swarm_answer)
148
+ thoughts = "Swarm analysis complete."
149
+ validation = (True, "Swarm output generated.")
150
  source = "Medical Swarm"
 
 
151
  history_for_agent.append(HumanMessage(content=f"[Document Uploaded] Query: '{query}'"))
152
  history_for_agent.append(AIMessage(content=answer))
153
+
154
+ elif has_image :
155
  logger.info("Processing Multimodal RAG: Query + Image")
156
  file = request.files['image']
157
  upload_dir = "Uploads"
158
  os.makedirs(upload_dir, exist_ok=True)
159
  image_path = os.path.join(upload_dir, file.filename)
 
160
  try:
161
+ file.save(image_path); file.close()
 
 
162
  with open(image_path, "rb") as img_file:
163
  img_data = base64.b64encode(img_file.read()).decode("utf-8")
164
+ vision_prompt = f"Analyze image. Query: '{query}'"
165
+ message = HumanMessage(content=[{"type": "text", "text": vision_prompt}, {"type": "image_url", "image_url": f"data:image/jpeg;base64,{img_data}"}])
166
+ visual_prediction = llm.invoke([message]).content
167
+ enhanced_query = (f'User Query: "{query}" Context from Image: "{visual_prediction}"')
 
 
 
 
 
 
 
 
 
 
 
 
 
168
  agent = rag_systems['medical']
169
+ if not agent: raise Exception("Medical RAG system not loaded.")
 
 
170
  response_dict = agent.answer(enhanced_query, chat_history=history_for_agent)
171
  answer, thoughts, validation, source = parse_agent_response(response_dict)
172
+ history_for_agent.append(HumanMessage(content=query + " [Image Attached]"))
 
173
  history_for_agent.append(AIMessage(content=answer))
 
174
  finally:
175
  if os.path.exists(image_path):
176
+ try: os.remove(image_path)
177
+ except Exception as e: logger.warning(f"Could not remove {image_path}. Error: {e}")
 
 
 
178
 
179
  elif query:
180
  history_doc_context = history_for_agent
181
  if current_medical_document:
182
+ history_doc_context = [HumanMessage(content=f"Document Context:\n{current_medical_document}")] + history_for_agent
 
183
  else:
184
  logger.info("Processing Text RAG query for Medical domain")
185
 
 
186
  standalone_query = get_standalone_question(query, history_doc_context, llm)
 
 
187
  agent = rag_systems['medical']
188
+ if not agent: raise Exception("Medical RAG system not loaded.")
 
 
189
  response_dict = agent.answer(standalone_query, chat_history=history_doc_context)
190
  answer, thoughts, validation, source = parse_agent_response(response_dict)
 
191
  history_for_agent.append(HumanMessage(content=query))
192
  history_for_agent.append(AIMessage(content=answer))
193
+
 
 
 
194
  except Exception as e:
195
  logger.error(f"Error on /medical page: {e}", exc_info=True)
196
  answer = f"An error occurred: {e}"
197
  thoughts = traceback.format_exc()
198
+ validation = (False, "Exception")
199
+ source = "Application Error"
200
+ history_for_agent.append(HumanMessage(content=query if query else "Failed request"))
201
+ history_for_agent.append(AIMessage(content=answer))
202
 
 
203
  session['medical_history'] = dehydrate_history(history_for_agent)
204
+ session['latest_medical_response'] = {'answer': answer, 'thoughts': thoughts, 'validation': validation, 'source': source}
 
 
 
 
 
 
 
205
  session.modified = True
 
 
 
 
 
206
 
207
+ logger.info(f"DEBUG: Saving to session: ANSWER='{answer[:50]}...'")
208
  return redirect(url_for('medical_page'))
209
 
210
  @app.route("/medical/clear")
211
  def clear_medical_chat():
212
  session.pop('medical_history', None)
213
  session.pop('current_medical_document', None)
 
214
  return redirect(url_for('medical_page'))
215
 
216
+ # --- ISLAMIC PAGE ---
217
  @app.route("/islamic", methods=["GET", "POST"])
218
  def islamic_page():
 
 
219
  if request.method == "GET":
220
+ latest_response = session.pop('latest_islamic_response', {})
221
+ return render_template("islamic_page.html",
 
 
 
 
 
 
 
 
 
 
 
222
  history=session.get('islamic_history', []),
223
+ answer=latest_response.get('answer', ""),
224
+ thoughts=latest_response.get('thoughts', ""),
225
+ validation=latest_response.get('validation', ""),
226
+ source=latest_response.get('source', ""))
227
 
 
228
  answer, thoughts, validation, source = "", "", "", ""
229
+ raw_history_list = session.get('islamic_history', [])
230
+ history_for_agent = hydrate_history(raw_history_list)
231
+ query = ""
232
  try:
233
  query = standardize_query(request.form.get("query", ""))
234
  has_image = 'image' in request.files and request.files['image'].filename
235
+ if not (query or has_image):
236
+ raise ValueError("No query or file provided.")
237
+ final_query = query
238
 
239
  if has_image:
240
  logger.info("Processing Multimodal RAG query for Islamic domain")
 
241
  file = request.files['image']
 
242
  upload_dir = "Uploads"
243
  os.makedirs(upload_dir, exist_ok=True)
244
  image_path = os.path.join(upload_dir, file.filename)
 
245
  try:
246
+ file.save(image_path); file.close()
 
 
247
  with open(image_path, "rb") as img_file:
248
  img_base64 = base64.b64encode(img_file.read()).decode("utf-8")
249
+ vision_prompt = f"Analyze image. Query: '{query}'"
 
250
  message = HumanMessage(content=[{"type": "text", "text": vision_prompt}, {"type": "image_url", "image_url": f"data:image/jpeg;base64,{img_base64}"}])
251
  visual_prediction = llm.invoke([message]).content
252
+ final_query = (f'User Query: "{query}" Context from Image: "{visual_prediction}"')
 
 
 
 
 
 
 
 
 
253
  finally:
254
  if os.path.exists(image_path):
255
+ try: os.remove(image_path)
256
+ except Exception as e: logger.warning(f"Could not remove {image_path}. Error: {e}")
257
+ history_for_agent.append(HumanMessage(content=query + " [Image Attached]"))
258
+
259
+ elif query:
 
 
 
260
  logger.info("Processing Text RAG query for Islamic domain")
261
+ final_query = get_standalone_question(query, history_for_agent, llm)
262
+ history_for_agent.append(HumanMessage(content=query))
 
 
 
263
 
 
 
 
264
  agent = rag_systems['islamic']
265
  if not agent: raise Exception("Islamic RAG system is not loaded.")
266
+ response_dict = agent.answer(final_query, chat_history=history_for_agent[:-1])
267
+ answer, thoughts, validation, source = parse_agent_response(response_dict)
268
+ history_for_agent.append(AIMessage(content=answer))
 
269
 
270
  except Exception as e:
271
  logger.error(f"Error on /islamic page: {e}", exc_info=True)
272
+ answer = f"An error occurred: {e}"; thoughts = traceback.format_exc(); validation = (False, "Exception"); source = "Application Error"
273
+ if not (has_image or query): history_for_agent.append(HumanMessage(content="Failed request"))
274
+ else: history_for_agent.append(HumanMessage(content=query))
275
+ history_for_agent.append(AIMessage(content=answer))
276
+
277
+ session['islamic_history'] = dehydrate_history(history_for_agent)
278
+ session['latest_islamic_response'] = {'answer': answer, 'thoughts': thoughts, 'validation': validation, 'source': source}
 
 
 
 
279
  session.modified = True
280
+ logger.info(f"DEBUG: Saving to session: ANSWER='{answer[:50]}...'")
 
 
281
  return redirect(url_for('islamic_page'))
282
 
283
  @app.route("/islamic/clear")
284
  def clear_islamic_chat():
285
  session.pop('islamic_history', None)
 
286
  return redirect(url_for('islamic_page'))
287
 
288
+ # --- INSURANCE PAGE ---
289
  @app.route("/insurance", methods=["GET", "POST"])
290
  def insurance_page():
291
  if request.method == "GET" :
292
  latest_response = session.pop('latest_insurance_response',{})
293
+ return render_template("insurance_page.html",
 
 
 
 
 
 
 
 
 
294
  history=session.get('insurance_history', []),
295
+ answer=latest_response.get('answer', ""),
296
+ thoughts=latest_response.get('thoughts', ""),
297
+ validation=latest_response.get('validation', ""),
298
+ source=latest_response.get('source', ""))
299
 
 
300
  answer, thoughts, validation, source = "", "", "", ""
301
+ raw_history_list = session.get('insurance_history', [])
302
+ history_for_agent = hydrate_history(raw_history_list)
303
+ query = ""
304
  try:
305
  query = standardize_query(request.form.get("query", ""))
306
+ if not query:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
307
  raise ValueError("No query provided.")
308
+
309
+ standalone_query = get_standalone_question(query, history_for_agent, llm)
310
+ agent = rag_systems['insurance']
311
+ if not agent: raise Exception("Insurance RAG system is not loaded.")
312
+
313
+ response_dict = agent.answer(standalone_query, chat_history=history_for_agent)
314
+ answer, thoughts, validation, source = parse_agent_response(response_dict)
315
+ history_for_agent.append(HumanMessage(content=query))
316
+ history_for_agent.append(AIMessage(content=answer))
317
 
318
  except Exception as e:
319
  logger.error(f"Error on /insurance page: {e}", exc_info=True)
320
+ answer = f"An error occurred: {e}"; thoughts = traceback.format_exc(); validation = (False, "Exception"); source = "Application Error"
321
+ history_for_agent.append(HumanMessage(content=query))
322
+ history_for_agent.append(AIMessage(content=answer))
323
+
324
+ session['insurance_history'] = dehydrate_history(history_for_agent)
325
+ session['latest_insurance_response'] = {'answer': answer, 'thoughts': thoughts, 'validation': validation, 'source': source}
 
 
 
 
326
  session.modified = True
 
327
  logger.debug(f"Redirecting after saving latest response.")
328
  return redirect(url_for('insurance_page'))
329
 
330
  @app.route("/insurance/clear")
331
  def clear_insurance_chat():
332
  session.pop('insurance_history', None)
 
333
  return redirect(url_for('insurance_page'))
334
 
335
  @app.route("/about", methods=["GET"])
336
  def about():
337
  return render_template("about.html")
338
 
339
+ # --- (Metrics routes remain unchanged) ---
340
  @app.route('/metrics/<domain>')
341
  def get_metrics(domain):
 
342
  try:
343
  if domain == "medical" and rag_systems['medical']:
344
  stats = rag_systems['medical'].metrics_tracker.get_stats()
 
350
  return jsonify({"error": f"{domain} RAG system not loaded"}), 500
351
  else:
352
  return jsonify({"error": "Invalid domain"}), 400
 
353
  return jsonify(stats)
354
  except Exception as e:
355
  return jsonify({"error": str(e)}), 500
356
 
357
  @app.route('/metrics/reset/<domain>', methods=['POST'])
358
  def reset_metrics(domain):
 
359
  try:
360
  if domain == "medical" and rag_systems['medical']:
361
  rag_systems['medical'].metrics_tracker.reset_metrics()
 
367
  return jsonify({"error": f"{domain} RAG system not loaded"}), 500
368
  else:
369
  return jsonify({"error": "Invalid domain"}), 400
 
370
  return jsonify({"success": True, "message": f"Metrics reset for {domain}"})
371
  except Exception as e:
372
  return jsonify({"error": str(e)}), 500
373
 
374
+ # --- 3. NEW API-ONLY ROUTES ---
375
+
376
+ @app.route("/api/medical", methods=["POST"])
377
+ def medical_api():
378
+ try:
379
+ data = request.json
380
+ query = data.get("query")
381
+ if not query:
382
+ return jsonify({"error": "No query provided"}), 400
383
+
384
+ # Hydrate history from the JSON payload
385
+ raw_history = data.get("history", [])
386
+ history_for_agent = hydrate_history(raw_history)
387
+
388
+ agent = rag_systems['medical']
389
+ if not agent:
390
+ return jsonify({"error": "Medical RAG system not loaded"}), 500
391
+
392
+ # Run the agent
393
+ response_dict = agent.answer(query, chat_history=history_for_agent)
394
+
395
+ # Return the full, clean JSON response
396
+ return jsonify(response_dict)
397
+
398
+ except Exception as e:
399
+ logger.error(f"Error on /api/medical: {e}", exc_info=True)
400
+ return jsonify({"error": str(e)}), 500
401
+
402
+ # (You can easily add /api/islamic and /api/insurance later by copying this)
403
+
404
  if __name__ == "__main__":
405
  logger.info("Starting Flask app for deployment testing...")
 
406
  app.run(host="0.0.0.0", port=7860, debug=False)