WanIrfan commited on
Commit
520ba33
·
verified ·
1 Parent(s): c58cbe4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +513 -517
app.py CHANGED
@@ -1,518 +1,514 @@
1
- from flask import Flask, request, render_template, session, url_for, redirect, jsonify
2
- from flask_session import Session
3
- from langchain_core.messages import HumanMessage, AIMessage
4
- from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
5
- import os
6
- import logging
7
- import re
8
- import traceback
9
- import base64
10
- import shutil
11
- import zipfile
12
- from dotenv import load_dotenv
13
- from huggingface_hub import hf_hub_download
14
- from PIL import Image
15
-
16
- # --- Core Application Imports ---
17
- # Make sure you have an empty __init__.py file in your 'src' folder
18
- from api import api_bp
19
- from src.medical_swarm import run_medical_swarm
20
- from src.utils import load_rag_system, standardize_query, get_standalone_question, parse_agent_response, markdown_bold_to_html
21
- from langchain_google_genai import ChatGoogleGenerativeAI
22
-
23
- # Setup logging
24
- logging.basicConfig(level=logging.DEBUG)
25
- logger = logging.getLogger(__name__)
26
-
27
- # Load environment variables
28
- load_dotenv()
29
-
30
- # --- 1. DATABASE SETUP FUNCTION (For Deployment) ---
31
- def setup_database():
32
- """Downloads and unzips the ChromaDB folder from Hugging Face Datasets."""
33
-
34
- # --- !!! IMPORTANT !!! ---
35
- # YOU MUST CHANGE THIS to your Hugging Face Dataset repo ID
36
- # For example: "your_username/your_database_repo_name"
37
- DATASET_REPO_ID = "WanIrfan/atlast-db"
38
- # -------------------------
39
-
40
- ZIP_FILENAME = "chroma_db.zip"
41
- DB_DIR = "chroma_db"
42
-
43
- if os.path.exists(DB_DIR) and os.listdir(DB_DIR):
44
- logger.info("✅ Database directory already exists. Skipping download.")
45
- return
46
-
47
- logger.info(f"📥 Downloading database from HF Hub: {DATASET_REPO_ID}")
48
- try:
49
- zip_path = hf_hub_download(
50
- repo_id=DATASET_REPO_ID,
51
- filename=ZIP_FILENAME,
52
- repo_type="dataset",
53
- # You might need to add your HF token to secrets if the dataset is private
54
- # token=os.getenv("HF_TOKEN")
55
- )
56
-
57
- logger.info(f"📦 Unzipping database from {zip_path}...")
58
- with zipfile.ZipFile(zip_path, 'r') as zip_ref:
59
- zip_ref.extractall(".") # Extracts to the root, creating ./chroma_db
60
-
61
- logger.info("✅ Database setup complete!")
62
-
63
- # Clean up the downloaded zip file to save space
64
- if os.path.exists(zip_path):
65
- os.remove(zip_path)
66
-
67
- except Exception as e:
68
- logger.error(f"❌ CRITICAL ERROR setting up database: {e}", exc_info=True)
69
- # This will likely cause the RAG system to fail loading, which is expected
70
- # if the database isn't available.
71
-
72
- # --- RUN DATABASE SETUP *BEFORE* INITIALIZING THE APP ---
73
- setup_database()
74
-
75
-
76
- # --- STANDARD FLASK APP INITIALIZATION ---
77
- app = Flask(__name__)
78
- app.secret_key = os.urandom(24) # Set a secret key for session signing
79
-
80
- # --- CONFIGURE SERVER-SIDE SESSIONS ---
81
- app.config["SESSION_PERMANENT"] = False
82
- app.config["SESSION_TYPE"] = "filesystem"
83
- Session(app)
84
-
85
- google_api_key = os.getenv("GOOGLE_API_KEY")
86
- if not google_api_key:
87
- logger.warning("⚠️ GOOGLE_API_KEY not found in environment variables. LLM calls will fail.")
88
- else:
89
- logger.info("GOOGLE_API_KEY loaded successfully.")
90
-
91
- # Initialize LLM
92
- llm = ChatGoogleGenerativeAI(model="gemini-2.5-flash", temperature=0.05, google_api_key=google_api_key)
93
-
94
- # --- LOAD RAG SYSTEMS (AFTER DB SETUP) ---
95
- logger.info("🌟 Starting Multi-Domain AI Assistant...")
96
- try:
97
- rag_systems = {
98
- 'medical': load_rag_system(collection_name="medical_csv_Agentic_retrieval", domain="medical"),
99
- 'islamic': load_rag_system(collection_name="islamic_texts_Agentic_retrieval", domain="islamic"),
100
- 'insurance': load_rag_system(collection_name="etiqa_Agentic_retrieval", domain="insurance")
101
- }
102
- except Exception as e:
103
- logger.error(f"❌ FAILED to load RAG systems. Check database path and permissions. Error: {e}", exc_info=True)
104
- rag_systems = {'medical': None, 'islamic': None, 'insurance': None}
105
-
106
- # Store systems and LLM on the app for blueprints
107
- app.rag_systems = rag_systems
108
- app.llm = llm
109
-
110
- # Register the API blueprint
111
- app.register_blueprint(api_bp)
112
- logger.info(f"✅ API Blueprint registered. API endpoints are now available under /api")
113
-
114
- # Check initialization status
115
- logger.info("\n📊 SYSTEM STATUS:")
116
- for domain, system in rag_systems.items():
117
- status = "✅ Ready" if system else "❌ Failed (DB missing?)"
118
- logger.info(f" {domain}: {status}")
119
-
120
-
121
- # --- FLASK ROUTES ---
122
-
123
- @app.route("/")
124
- def homePage():
125
- # Clear all session history when visiting the home page
126
- session.pop('medical_history', None)
127
- session.pop('islamic_history', None)
128
- session.pop('insurance_history', None)
129
- session.pop('current_medical_document', None)
130
- return render_template("homePage.html")
131
-
132
-
133
- @app.route("/medical", methods=["GET", "POST"])
134
- def medical_page():
135
- # Use session for history and document context
136
- if request.method == "GET":
137
- # Load all latest data from session (or default to empty if not found)
138
- latest_response = session.pop('latest_medical_response', {}) # POP to clear it after one display
139
-
140
- answer = latest_response.get('answer', "")
141
- thoughts = latest_response.get('thoughts', "")
142
- validation = latest_response.get('validation', "")
143
- source = latest_response.get('source', "")
144
-
145
- # Clear history only when a user first navigates (not on redirect)
146
- if not latest_response and 'medical_history' not in session:
147
- session.pop('current_medical_document', None)
148
-
149
- return render_template("medical_page.html",
150
- history=session.get('medical_history', []),
151
- answer=answer,
152
- thoughts=thoughts,
153
- validation=validation,
154
- source=source)
155
-
156
- # POST Request Logic
157
- answer, thoughts, validation, source = "", "", "", ""
158
- history = session.get('medical_history', [])
159
- current_medical_document = session.get('current_medical_document', "")
160
-
161
-
162
- try:
163
- query=standardize_query(request.form.get("query", ""))
164
- has_image = 'image' in request.files and request.files['image'].filename
165
- has_document = 'document' in request.files and request.files['document'].filename
166
- has_query = request.form.get("query") or request.form.get("question", "")
167
-
168
- logger.info(f"POST request received: has_image={has_image}, has_document={has_document}, has_query={has_query}")
169
-
170
- if has_document:
171
- # Scenario 3: Query + Document
172
- logger.info("Processing Scenario 3: Query + Document with Medical Swarm")
173
- file = request.files['document']
174
- try:
175
- # Store the new document text in the session
176
- document_text = file.read().decode("utf-8")
177
- session['current_medical_document'] = document_text
178
- current_medical_document = document_text # Use the new document for this turn
179
- except UnicodeDecodeError:
180
- answer = "Error: Could not decode the uploaded document. Please ensure it is a valid text or PDF file."
181
- logger.error("Scenario 3: Document decode error")
182
- thoughts = traceback.format_exc()
183
-
184
- swarm_answer = run_medical_swarm(current_medical_document, query)
185
- answer = markdown_bold_to_html(swarm_answer)
186
-
187
- history.append(HumanMessage(content=f"[Document Uploaded] Query: '{query}'"))
188
- history.append(AIMessage(content=swarm_answer))
189
- thoughts = "Swarm analysis complete. The process is orchestrated and does not use the ReAct thought process. You can now ask follow-up questions."
190
- source= "Medical Swarm"
191
- validation = (True, "Swarm output generated.") # Swarm has its own validation logic
192
-
193
- elif has_image :
194
- #Scenario 1
195
- logger.info("Processing Multimodal RAG: Query + Image")
196
- # --- Step 1 & 2: Image Setup & Vision Analysis ---
197
- file = request.files['image']
198
- upload_dir = "Uploads"
199
- os.makedirs(upload_dir, exist_ok=True)
200
- image_path = os.path.join(upload_dir, file.filename)
201
-
202
- try:
203
- file.save(image_path)
204
- file.close()
205
-
206
- with open(image_path, "rb") as img_file:
207
- img_data = base64.b64encode(img_file.read()).decode("utf-8")
208
-
209
-
210
- vision_prompt = f"Analyze this image and identify the main subject in a single, concise sentence. The user's query is: '{query}'"
211
- message = HumanMessage(content=[
212
- {"type": "text", "text": vision_prompt},
213
- {"type": "image_url", "image_url": f"data:image/jpeg;base64,{img_data}"}
214
- ])
215
- vision_response = llm.invoke([message])
216
- visual_prediction = vision_response.content
217
- logger.info(f"Vision Prediction: {visual_prediction}")
218
-
219
- # --- Create an Enhanced Query ---
220
- enhanced_query = (
221
- f'User Query: "{query}" '
222
- f'Context from an image provided by the LLM: "{visual_prediction}" '
223
- 'Based on the user\'s query and the context from LLM, provide a comprehensive answer.'
224
- )
225
- logger.info(f"Enhanced query : {enhanced_query}")
226
-
227
- agent = rag_systems['medical']
228
- if not agent: raise Exception("Medical RAG system is not loaded.")
229
- response_dict = agent.answer(enhanced_query, chat_history=history)
230
- answer, thoughts, validation, source = parse_agent_response(response_dict)
231
- history.append(HumanMessage(content=query))
232
- history.append(AIMessage(content=answer))
233
-
234
- finally:
235
- if os.path.exists(image_path):
236
- try:
237
- os.remove(image_path)
238
- logger.info(f"Successfully deleted temporary image file: {image_path}")
239
- except PermissionError as e:
240
- logger.warning(f"Could not remove {image_path} after processing. "
241
- f"File may be locked by another process. Error: {e}")
242
-
243
- elif query:
244
- # --- SCENARIO 2: TEXT-ONLY QUERY OR SWARM FOLLOW-UP ---
245
- history_for_agent = history
246
- if current_medical_document:
247
- logger.info("Processing Follow-up Query for Document")
248
- history_for_agent = [HumanMessage(content=f"We are discussing this document:\n{current_medical_document}")] + history
249
- else:
250
- logger.info("Processing Text RAG query for Medical domain")
251
-
252
- logger.info(f"Original Query: '{query}'")
253
- print(f"📚 Using chat history with {len(history)} previous messages to create standalone query")
254
- standalone_query = get_standalone_question(query, history_for_agent,llm)
255
- logger.info(f"Standalone Query: '{standalone_query}'")
256
-
257
- agent = rag_systems['medical']
258
- if not agent: raise Exception("Medical RAG system is not loaded.")
259
- response_dict = agent.answer(standalone_query, chat_history=history_for_agent)
260
- answer, thoughts, validation, source = parse_agent_response(response_dict)
261
-
262
- history.append(HumanMessage(content=query))
263
- history.append(AIMessage(content=answer))
264
-
265
- else:
266
- raise ValueError("No query or file provided.")
267
- except Exception as e:
268
- logger.error(f"Error on /medical page: {e}", exc_info=True)
269
- answer = f"An error occurred: {e}"
270
- thoughts = traceback.format_exc()
271
-
272
- # Save updated history and LATEST RESPONSE DATA back to the session
273
- session['medical_history'] = history
274
- session['latest_medical_response'] = {
275
- 'answer': answer,
276
- 'thoughts': thoughts,
277
- 'validation': validation,
278
- 'source': source
279
- }
280
- session.modified = True
281
-
282
- logger.debug(f"Redirecting after saving latest response.")
283
- return redirect(url_for('medical_page'))
284
-
285
- @app.route("/medical/clear")
286
- def clear_medical_chat():
287
- session.pop('medical_history', None)
288
- session.pop('current_medical_document', None)
289
- logger.info("Medical chat history cleared.")
290
- return redirect(url_for('medical_page'))
291
-
292
- @app.route("/islamic", methods=["GET", "POST"])
293
- def islamic_page():
294
- #Use session
295
-
296
- if request.method == "GET":
297
- # Load all latest data from session (or default to empty if not found)
298
- latest_response = session.pop('latest_islamic_response', {}) # POP to clear it after one display
299
-
300
- answer = latest_response.get('answer', "")
301
- thoughts = latest_response.get('thoughts', "")
302
- validation = latest_response.get('validation', "")
303
- source = latest_response.get('source', "")
304
-
305
- # Clear history only when a user first navigates (no latest_response and no current history)
306
- if not latest_response and 'islamic_history' not in session:
307
- session.pop('islamic_history', None)
308
-
309
- return render_template("islamic_page.html",
310
- history=session.get('islamic_history', []),
311
- answer=answer,
312
- thoughts=thoughts,
313
- validation=validation,
314
- source=source)
315
-
316
- # POST Request Logic
317
- answer, thoughts, validation, source = "", "", "", ""
318
- history = session.get('islamic_history', [])
319
-
320
- # This try/except block wraps the ENTIRE POST logic
321
- try:
322
- query = standardize_query(request.form.get("query", ""))
323
- has_image = 'image' in request.files and request.files['image'].filename
324
-
325
- final_query = query # Default to the original query
326
-
327
- if has_image:
328
- logger.info("Processing Multimodal RAG query for Islamic domain")
329
-
330
- file = request.files['image']
331
-
332
- upload_dir = "Uploads"
333
- os.makedirs(upload_dir, exist_ok=True)
334
- image_path = os.path.join(upload_dir, file.filename)
335
-
336
- try:
337
- file.save(image_path)
338
- file.close()
339
-
340
- with open(image_path, "rb") as img_file:
341
- img_base64 = base64.b64encode(img_file.read()).decode("utf-8")
342
-
343
- vision_prompt = f"Analyze this image's main subject. User's query is: '{query}'"
344
- message = HumanMessage(content=[{"type": "text", "text": vision_prompt}, {"type": "image_url", "image_url": f"data:image/jpeg;base64,{img_base64}"}])
345
- visual_prediction = llm.invoke([message]).content
346
-
347
- enhanced_query = (
348
- f'User Query: "{query}" '
349
- f'Context from an image provided by the LLM: "{visual_prediction}" '
350
- 'Based on the user\'s query and the context from LLM, provide a comprehensive answer.'
351
- )
352
- logger.info(f"Create enchanced query : {enhanced_query}")
353
-
354
- final_query = enhanced_query
355
-
356
- finally:
357
- if os.path.exists(image_path):
358
- try:
359
- os.remove(image_path)
360
- logger.info(f"Successfully cleaned up {image_path}")
361
- except PermissionError as e:
362
- logger.warning(f"Could not remove {image_path} after processing. "
363
- f"File may be locked. Error: {e}")
364
-
365
- elif query: # Only run text logic if there's a query and no image
366
- logger.info("Processing Text RAG query for Islamic domain")
367
- standalone_query = get_standalone_question(query, history,llm)
368
- logger.info(f"Original Query: '{query}'")
369
- print(f"📚 Using chat history with {len(history)} previous messages to create standalone query")
370
- logger.info(f"Standalone Query: '{standalone_query}'")
371
- final_query = standalone_query
372
-
373
- if not final_query:
374
- raise ValueError("No query or file provided.")
375
-
376
- agent = rag_systems['islamic']
377
- if not agent: raise Exception("Islamic RAG system is not loaded.")
378
- response_dict = agent.answer(final_query, chat_history=history)
379
- answer, thoughts , validation, source = parse_agent_response(response_dict)
380
- history.append(HumanMessage(content=query))
381
- history.append(AIMessage(content=answer))
382
-
383
- except Exception as e:
384
- logger.error(f"Error on /islamic page: {e}", exc_info=True)
385
- answer = f"An error occurred: {e}"
386
- thoughts = traceback.format_exc()
387
-
388
- # Save updated history and LATEST RESPONSE DATA back to the session
389
- session['islamic_history'] = history
390
- session['latest_islamic_response'] = {
391
- 'answer': answer,
392
- 'thoughts': thoughts,
393
- 'validation': validation,
394
- 'source': source
395
- }
396
- session.modified = True
397
-
398
- logger.debug(f"Redirecting after saving latest response.")
399
- return redirect(url_for('islamic_page'))
400
-
401
- @app.route("/islamic/clear")
402
- def clear_islamic_chat():
403
- session.pop('islamic_history', None)
404
- logger.info("Islamic chat history cleared.")
405
- return redirect(url_for('islamic_page'))
406
-
407
- @app.route("/insurance", methods=["GET", "POST"])
408
- def insurance_page():
409
- if request.method == "GET" :
410
- latest_response = session.pop('latest_insurance_response',{})
411
-
412
- answer = latest_response.get('answer', "")
413
- thoughts = latest_response.get('thoughts', "")
414
- validation = latest_response.get('validation', "")
415
- source = latest_response.get('source', "")
416
-
417
- if not latest_response and 'insurance_history' not in session:
418
- session.pop('insurance_history', None)
419
-
420
- return render_template("insurance_page.html", # You will need to create this HTML file
421
- history=session.get('insurance_history', []),
422
- answer=answer,
423
- thoughts=thoughts,
424
- validation=validation,
425
- source=source)
426
-
427
- # POST Request Logic
428
- answer, thoughts, validation, source = "", "", "", ""
429
- history = session.get('insurance_history', [])
430
-
431
- try:
432
- query = standardize_query(request.form.get("query", ""))
433
-
434
- if query:
435
- logger.info("Processing Text RAG query for Insurance domain")
436
- standalone_query = get_standalone_question(query, history, llm)
437
- logger.info(f"Original Query: '{query}'")
438
- logger.info(f"Standalone Query: '{standalone_query}'")
439
-
440
- agent = rag_systems['insurance']
441
- if not agent: raise Exception("Insurance RAG system is not loaded.")
442
- response_dict = agent.answer(standalone_query, chat_history=history)
443
- answer, thoughts, validation, source = parse_agent_response(response_dict)
444
-
445
- history.append(HumanMessage(content=query))
446
- history.append(AIMessage(content=answer))
447
- else:
448
- raise ValueError("No query provided.")
449
-
450
- except Exception as e:
451
- logger.error(f"Error on /insurance page: {e}", exc_info=True)
452
- answer = f"An error occurred: {e}"
453
- thoughts = traceback.format_exc()
454
-
455
- session['insurance_history'] = history
456
- session['latest_insurance_response'] = {
457
- 'answer': answer,
458
- 'thoughts': thoughts,
459
- 'validation': validation,
460
- 'source': source
461
- }
462
- session.modified = True
463
-
464
- logger.debug(f"Redirecting after saving latest response.")
465
- return redirect(url_for('insurance_page'))
466
-
467
- @app.route("/insurance/clear")
468
- def clear_insurance_chat():
469
- session.pop('insurance_history', None)
470
- logger.info("Insurance chat history cleared.")
471
- return redirect(url_for('insurance_page'))
472
-
473
- @app.route("/about", methods=["GET"])
474
- def about():
475
- return render_template("about.html")
476
-
477
- @app.route('/metrics/<domain>')
478
- def get_metrics(domain):
479
- """API endpoint to get metrics for a specific domain."""
480
- try:
481
- if domain == "medical" and rag_systems['medical']:
482
- stats = rag_systems['medical'].metrics_tracker.get_stats()
483
- elif domain == "islamic" and rag_systems['islamic']:
484
- stats = rag_systems['islamic'].metrics_tracker.get_stats()
485
- elif domain == "insurance" and rag_systems['insurance']:
486
- stats = rag_systems['insurance'].metrics_tracker.get_stats()
487
- elif not rag_systems.get(domain):
488
- return jsonify({"error": f"{domain} RAG system not loaded"}), 500
489
- else:
490
- return jsonify({"error": "Invalid domain"}), 400
491
-
492
- return jsonify(stats)
493
- except Exception as e:
494
- return jsonify({"error": str(e)}), 500
495
-
496
- @app.route('/metrics/reset/<domain>', methods=['POST'])
497
- def reset_metrics(domain):
498
- """Reset metrics for a domain (useful for testing)."""
499
- try:
500
- if domain == "medical" and rag_systems['medical']:
501
- rag_systems['medical'].metrics_tracker.reset_metrics()
502
- elif domain == "islamic" and rag_systems['islamic']:
503
- rag_systems['islamic'].metrics_tracker.reset_metrics()
504
- elif domain == "insurance" and rag_systems['insurance']:
505
- rag_systems['insurance'].metrics_tracker.reset_metrics()
506
- elif not rag_systems.get(domain):
507
- return jsonify({"error": f"{domain} RAG system not loaded"}), 500
508
- else:
509
- return jsonify({"error": "Invalid domain"}), 400
510
-
511
- return jsonify({"success": True, "message": f"Metrics reset for {domain}"})
512
- except Exception as e:
513
- return jsonify({"error": str(e)}), 500
514
-
515
- if __name__ == "__main__":
516
- logger.info("Starting Flask app for deployment testing...")
517
- # This port 7860 is what Hugging Face Spaces expects by default
518
  app.run(host="0.0.0.0", port=7860, debug=False)
 
1
+ from flask import Flask, request, render_template, session, url_for, redirect, jsonify
2
+ from flask_session import Session
3
+ from langchain_core.messages import HumanMessage, AIMessage
4
+ from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
5
+ import os
6
+ import logging
7
+ import re
8
+ import traceback
9
+ import base64
10
+ import shutil
11
+ import zipfile
12
+ from dotenv import load_dotenv
13
+ from huggingface_hub import hf_hub_download
14
+
15
+ # --- Core Application Imports ---
16
+ # Make sure you have an empty __init__.py file in your 'src' folder
17
+
18
+ from src.medical_swarm import run_medical_swarm
19
+ from src.utils import load_rag_system, standardize_query, get_standalone_question, parse_agent_response, markdown_bold_to_html
20
+ from langchain_google_genai import ChatGoogleGenerativeAI
21
+
22
+ # Setup logging
23
+ logging.basicConfig(level=logging.DEBUG)
24
+ logger = logging.getLogger(__name__)
25
+
26
+ # Load environment variables
27
+ load_dotenv()
28
+
29
+ # --- 1. DATABASE SETUP FUNCTION (For Deployment) ---
30
+ def setup_database():
31
+ """Downloads and unzips the ChromaDB folder from Hugging Face Datasets."""
32
+
33
+ # --- !!! IMPORTANT !!! ---
34
+ # YOU MUST CHANGE THIS to your Hugging Face Dataset repo ID
35
+ # For example: "your_username/your_database_repo_name"
36
+ DATASET_REPO_ID = "WanIrfan/atlast-db"
37
+ # -------------------------
38
+
39
+ ZIP_FILENAME = "chroma_db.zip"
40
+ DB_DIR = "chroma_db"
41
+
42
+ if os.path.exists(DB_DIR) and os.listdir(DB_DIR):
43
+ logger.info("✅ Database directory already exists. Skipping download.")
44
+ return
45
+
46
+ logger.info(f"📥 Downloading database from HF Hub: {DATASET_REPO_ID}")
47
+ try:
48
+ zip_path = hf_hub_download(
49
+ repo_id=DATASET_REPO_ID,
50
+ filename=ZIP_FILENAME,
51
+ repo_type="dataset",
52
+ # You might need to add your HF token to secrets if the dataset is private
53
+ # token=os.getenv("HF_TOKEN")
54
+ )
55
+
56
+ logger.info(f"📦 Unzipping database from {zip_path}...")
57
+ with zipfile.ZipFile(zip_path, 'r') as zip_ref:
58
+ zip_ref.extractall(".") # Extracts to the root, creating ./chroma_db
59
+
60
+ logger.info("✅ Database setup complete!")
61
+
62
+ # Clean up the downloaded zip file to save space
63
+ if os.path.exists(zip_path):
64
+ os.remove(zip_path)
65
+
66
+ except Exception as e:
67
+ logger.error(f"❌ CRITICAL ERROR setting up database: {e}", exc_info=True)
68
+ # This will likely cause the RAG system to fail loading, which is expected
69
+ # if the database isn't available.
70
+
71
+ # --- RUN DATABASE SETUP *BEFORE* INITIALIZING THE APP ---
72
+ setup_database()
73
+
74
+
75
+ # --- STANDARD FLASK APP INITIALIZATION ---
76
+ app = Flask(__name__)
77
+ app.secret_key = os.urandom(24) # Set a secret key for session signing
78
+
79
+ # --- CONFIGURE SERVER-SIDE SESSIONS ---
80
+ app.config["SESSION_PERMANENT"] = False
81
+ app.config["SESSION_TYPE"] = "filesystem"
82
+ Session(app)
83
+
84
+ google_api_key = os.getenv("GOOGLE_API_KEY")
85
+ if not google_api_key:
86
+ logger.warning("⚠️ GOOGLE_API_KEY not found in environment variables. LLM calls will fail.")
87
+ else:
88
+ logger.info("GOOGLE_API_KEY loaded successfully.")
89
+
90
+ # Initialize LLM
91
+ llm = ChatGoogleGenerativeAI(model="gemini-2.5-flash", temperature=0.05, google_api_key=google_api_key)
92
+
93
+ # --- LOAD RAG SYSTEMS (AFTER DB SETUP) ---
94
+ logger.info("🌟 Starting Multi-Domain AI Assistant...")
95
+ try:
96
+ rag_systems = {
97
+ 'medical': load_rag_system(collection_name="medical_csv_Agentic_retrieval", domain="medical"),
98
+ 'islamic': load_rag_system(collection_name="islamic_texts_Agentic_retrieval", domain="islamic"),
99
+ 'insurance': load_rag_system(collection_name="etiqa_Agentic_retrieval", domain="insurance")
100
+ }
101
+ except Exception as e:
102
+ logger.error(f"❌ FAILED to load RAG systems. Check database path and permissions. Error: {e}", exc_info=True)
103
+ rag_systems = {'medical': None, 'islamic': None, 'insurance': None}
104
+
105
+ # Store systems and LLM on the app for blueprints
106
+ app.rag_systems = rag_systems
107
+ app.llm = llm
108
+
109
+
110
+ # Check initialization status
111
+ logger.info("\n📊 SYSTEM STATUS:")
112
+ for domain, system in rag_systems.items():
113
+ status = "✅ Ready" if system else "❌ Failed (DB missing?)"
114
+ logger.info(f" {domain}: {status}")
115
+
116
+
117
+ # --- FLASK ROUTES ---
118
+
119
+ @app.route("/")
120
+ def homePage():
121
+ # Clear all session history when visiting the home page
122
+ session.pop('medical_history', None)
123
+ session.pop('islamic_history', None)
124
+ session.pop('insurance_history', None)
125
+ session.pop('current_medical_document', None)
126
+ return render_template("homePage.html")
127
+
128
+
129
+ @app.route("/medical", methods=["GET", "POST"])
130
+ def medical_page():
131
+ # Use session for history and document context
132
+ if request.method == "GET":
133
+ # Load all latest data from session (or default to empty if not found)
134
+ latest_response = session.pop('latest_medical_response', {}) # POP to clear it after one display
135
+
136
+ answer = latest_response.get('answer', "")
137
+ thoughts = latest_response.get('thoughts', "")
138
+ validation = latest_response.get('validation', "")
139
+ source = latest_response.get('source', "")
140
+
141
+ # Clear history only when a user first navigates (not on redirect)
142
+ if not latest_response and 'medical_history' not in session:
143
+ session.pop('current_medical_document', None)
144
+
145
+ return render_template("medical_page.html",
146
+ history=session.get('medical_history', []),
147
+ answer=answer,
148
+ thoughts=thoughts,
149
+ validation=validation,
150
+ source=source)
151
+
152
+ # POST Request Logic
153
+ answer, thoughts, validation, source = "", "", "", ""
154
+ history = session.get('medical_history', [])
155
+ current_medical_document = session.get('current_medical_document', "")
156
+
157
+
158
+ try:
159
+ query=standardize_query(request.form.get("query", ""))
160
+ has_image = 'image' in request.files and request.files['image'].filename
161
+ has_document = 'document' in request.files and request.files['document'].filename
162
+ has_query = request.form.get("query") or request.form.get("question", "")
163
+
164
+ logger.info(f"POST request received: has_image={has_image}, has_document={has_document}, has_query={has_query}")
165
+
166
+ if has_document:
167
+ # Scenario 3: Query + Document
168
+ logger.info("Processing Scenario 3: Query + Document with Medical Swarm")
169
+ file = request.files['document']
170
+ try:
171
+ # Store the new document text in the session
172
+ document_text = file.read().decode("utf-8")
173
+ session['current_medical_document'] = document_text
174
+ current_medical_document = document_text # Use the new document for this turn
175
+ except UnicodeDecodeError:
176
+ answer = "Error: Could not decode the uploaded document. Please ensure it is a valid text or PDF file."
177
+ logger.error("Scenario 3: Document decode error")
178
+ thoughts = traceback.format_exc()
179
+
180
+ swarm_answer = run_medical_swarm(current_medical_document, query)
181
+ answer = markdown_bold_to_html(swarm_answer)
182
+
183
+ history.append(HumanMessage(content=f"[Document Uploaded] Query: '{query}'"))
184
+ history.append(AIMessage(content=swarm_answer))
185
+ thoughts = "Swarm analysis complete. The process is orchestrated and does not use the ReAct thought process. You can now ask follow-up questions."
186
+ source= "Medical Swarm"
187
+ validation = (True, "Swarm output generated.") # Swarm has its own validation logic
188
+
189
+ elif has_image :
190
+ #Scenario 1
191
+ logger.info("Processing Multimodal RAG: Query + Image")
192
+ # --- Step 1 & 2: Image Setup & Vision Analysis ---
193
+ file = request.files['image']
194
+ upload_dir = "Uploads"
195
+ os.makedirs(upload_dir, exist_ok=True)
196
+ image_path = os.path.join(upload_dir, file.filename)
197
+
198
+ try:
199
+ file.save(image_path)
200
+ file.close()
201
+
202
+ with open(image_path, "rb") as img_file:
203
+ img_data = base64.b64encode(img_file.read()).decode("utf-8")
204
+
205
+
206
+ vision_prompt = f"Analyze this image and identify the main subject in a single, concise sentence. The user's query is: '{query}'"
207
+ message = HumanMessage(content=[
208
+ {"type": "text", "text": vision_prompt},
209
+ {"type": "image_url", "image_url": f"data:image/jpeg;base64,{img_data}"}
210
+ ])
211
+ vision_response = llm.invoke([message])
212
+ visual_prediction = vision_response.content
213
+ logger.info(f"Vision Prediction: {visual_prediction}")
214
+
215
+ # --- Create an Enhanced Query ---
216
+ enhanced_query = (
217
+ f'User Query: "{query}" '
218
+ f'Context from an image provided by the LLM: "{visual_prediction}" '
219
+ 'Based on the user\'s query and the context from LLM, provide a comprehensive answer.'
220
+ )
221
+ logger.info(f"Enhanced query : {enhanced_query}")
222
+
223
+ agent = rag_systems['medical']
224
+ if not agent: raise Exception("Medical RAG system is not loaded.")
225
+ response_dict = agent.answer(enhanced_query, chat_history=history)
226
+ answer, thoughts, validation, source = parse_agent_response(response_dict)
227
+ history.append(HumanMessage(content=query))
228
+ history.append(AIMessage(content=answer))
229
+
230
+ finally:
231
+ if os.path.exists(image_path):
232
+ try:
233
+ os.remove(image_path)
234
+ logger.info(f"Successfully deleted temporary image file: {image_path}")
235
+ except PermissionError as e:
236
+ logger.warning(f"Could not remove {image_path} after processing. "
237
+ f"File may be locked by another process. Error: {e}")
238
+
239
+ elif query:
240
+ # --- SCENARIO 2: TEXT-ONLY QUERY OR SWARM FOLLOW-UP ---
241
+ history_for_agent = history
242
+ if current_medical_document:
243
+ logger.info("Processing Follow-up Query for Document")
244
+ history_for_agent = [HumanMessage(content=f"We are discussing this document:\n{current_medical_document}")] + history
245
+ else:
246
+ logger.info("Processing Text RAG query for Medical domain")
247
+
248
+ logger.info(f"Original Query: '{query}'")
249
+ print(f"📚 Using chat history with {len(history)} previous messages to create standalone query")
250
+ standalone_query = get_standalone_question(query, history_for_agent,llm)
251
+ logger.info(f"Standalone Query: '{standalone_query}'")
252
+
253
+ agent = rag_systems['medical']
254
+ if not agent: raise Exception("Medical RAG system is not loaded.")
255
+ response_dict = agent.answer(standalone_query, chat_history=history_for_agent)
256
+ answer, thoughts, validation, source = parse_agent_response(response_dict)
257
+
258
+ history.append(HumanMessage(content=query))
259
+ history.append(AIMessage(content=answer))
260
+
261
+ else:
262
+ raise ValueError("No query or file provided.")
263
+ except Exception as e:
264
+ logger.error(f"Error on /medical page: {e}", exc_info=True)
265
+ answer = f"An error occurred: {e}"
266
+ thoughts = traceback.format_exc()
267
+
268
+ # Save updated history and LATEST RESPONSE DATA back to the session
269
+ session['medical_history'] = history
270
+ session['latest_medical_response'] = {
271
+ 'answer': answer,
272
+ 'thoughts': thoughts,
273
+ 'validation': validation,
274
+ 'source': source
275
+ }
276
+ session.modified = True
277
+
278
+ logger.debug(f"Redirecting after saving latest response.")
279
+ return redirect(url_for('medical_page'))
280
+
281
+ @app.route("/medical/clear")
282
+ def clear_medical_chat():
283
+ session.pop('medical_history', None)
284
+ session.pop('current_medical_document', None)
285
+ logger.info("Medical chat history cleared.")
286
+ return redirect(url_for('medical_page'))
287
+
288
+ @app.route("/islamic", methods=["GET", "POST"])
289
+ def islamic_page():
290
+ #Use session
291
+
292
+ if request.method == "GET":
293
+ # Load all latest data from session (or default to empty if not found)
294
+ latest_response = session.pop('latest_islamic_response', {}) # POP to clear it after one display
295
+
296
+ answer = latest_response.get('answer', "")
297
+ thoughts = latest_response.get('thoughts', "")
298
+ validation = latest_response.get('validation', "")
299
+ source = latest_response.get('source', "")
300
+
301
+ # Clear history only when a user first navigates (no latest_response and no current history)
302
+ if not latest_response and 'islamic_history' not in session:
303
+ session.pop('islamic_history', None)
304
+
305
+ return render_template("islamic_page.html",
306
+ history=session.get('islamic_history', []),
307
+ answer=answer,
308
+ thoughts=thoughts,
309
+ validation=validation,
310
+ source=source)
311
+
312
+ # POST Request Logic
313
+ answer, thoughts, validation, source = "", "", "", ""
314
+ history = session.get('islamic_history', [])
315
+
316
+ # This try/except block wraps the ENTIRE POST logic
317
+ try:
318
+ query = standardize_query(request.form.get("query", ""))
319
+ has_image = 'image' in request.files and request.files['image'].filename
320
+
321
+ final_query = query # Default to the original query
322
+
323
+ if has_image:
324
+ logger.info("Processing Multimodal RAG query for Islamic domain")
325
+
326
+ file = request.files['image']
327
+
328
+ upload_dir = "Uploads"
329
+ os.makedirs(upload_dir, exist_ok=True)
330
+ image_path = os.path.join(upload_dir, file.filename)
331
+
332
+ try:
333
+ file.save(image_path)
334
+ file.close()
335
+
336
+ with open(image_path, "rb") as img_file:
337
+ img_base64 = base64.b64encode(img_file.read()).decode("utf-8")
338
+
339
+ vision_prompt = f"Analyze this image's main subject. User's query is: '{query}'"
340
+ message = HumanMessage(content=[{"type": "text", "text": vision_prompt}, {"type": "image_url", "image_url": f"data:image/jpeg;base64,{img_base64}"}])
341
+ visual_prediction = llm.invoke([message]).content
342
+
343
+ enhanced_query = (
344
+ f'User Query: "{query}" '
345
+ f'Context from an image provided by the LLM: "{visual_prediction}" '
346
+ 'Based on the user\'s query and the context from LLM, provide a comprehensive answer.'
347
+ )
348
+ logger.info(f"Create enchanced query : {enhanced_query}")
349
+
350
+ final_query = enhanced_query
351
+
352
+ finally:
353
+ if os.path.exists(image_path):
354
+ try:
355
+ os.remove(image_path)
356
+ logger.info(f"Successfully cleaned up {image_path}")
357
+ except PermissionError as e:
358
+ logger.warning(f"Could not remove {image_path} after processing. "
359
+ f"File may be locked. Error: {e}")
360
+
361
+ elif query: # Only run text logic if there's a query and no image
362
+ logger.info("Processing Text RAG query for Islamic domain")
363
+ standalone_query = get_standalone_question(query, history,llm)
364
+ logger.info(f"Original Query: '{query}'")
365
+ print(f"📚 Using chat history with {len(history)} previous messages to create standalone query")
366
+ logger.info(f"Standalone Query: '{standalone_query}'")
367
+ final_query = standalone_query
368
+
369
+ if not final_query:
370
+ raise ValueError("No query or file provided.")
371
+
372
+ agent = rag_systems['islamic']
373
+ if not agent: raise Exception("Islamic RAG system is not loaded.")
374
+ response_dict = agent.answer(final_query, chat_history=history)
375
+ answer, thoughts , validation, source = parse_agent_response(response_dict)
376
+ history.append(HumanMessage(content=query))
377
+ history.append(AIMessage(content=answer))
378
+
379
+ except Exception as e:
380
+ logger.error(f"Error on /islamic page: {e}", exc_info=True)
381
+ answer = f"An error occurred: {e}"
382
+ thoughts = traceback.format_exc()
383
+
384
+ # Save updated history and LATEST RESPONSE DATA back to the session
385
+ session['islamic_history'] = history
386
+ session['latest_islamic_response'] = {
387
+ 'answer': answer,
388
+ 'thoughts': thoughts,
389
+ 'validation': validation,
390
+ 'source': source
391
+ }
392
+ session.modified = True
393
+
394
+ logger.debug(f"Redirecting after saving latest response.")
395
+ return redirect(url_for('islamic_page'))
396
+
397
+ @app.route("/islamic/clear")
398
+ def clear_islamic_chat():
399
+ session.pop('islamic_history', None)
400
+ logger.info("Islamic chat history cleared.")
401
+ return redirect(url_for('islamic_page'))
402
+
403
+ @app.route("/insurance", methods=["GET", "POST"])
404
+ def insurance_page():
405
+ if request.method == "GET" :
406
+ latest_response = session.pop('latest_insurance_response',{})
407
+
408
+ answer = latest_response.get('answer', "")
409
+ thoughts = latest_response.get('thoughts', "")
410
+ validation = latest_response.get('validation', "")
411
+ source = latest_response.get('source', "")
412
+
413
+ if not latest_response and 'insurance_history' not in session:
414
+ session.pop('insurance_history', None)
415
+
416
+ return render_template("insurance_page.html", # You will need to create this HTML file
417
+ history=session.get('insurance_history', []),
418
+ answer=answer,
419
+ thoughts=thoughts,
420
+ validation=validation,
421
+ source=source)
422
+
423
+ # POST Request Logic
424
+ answer, thoughts, validation, source = "", "", "", ""
425
+ history = session.get('insurance_history', [])
426
+
427
+ try:
428
+ query = standardize_query(request.form.get("query", ""))
429
+
430
+ if query:
431
+ logger.info("Processing Text RAG query for Insurance domain")
432
+ standalone_query = get_standalone_question(query, history, llm)
433
+ logger.info(f"Original Query: '{query}'")
434
+ logger.info(f"Standalone Query: '{standalone_query}'")
435
+
436
+ agent = rag_systems['insurance']
437
+ if not agent: raise Exception("Insurance RAG system is not loaded.")
438
+ response_dict = agent.answer(standalone_query, chat_history=history)
439
+ answer, thoughts, validation, source = parse_agent_response(response_dict)
440
+
441
+ history.append(HumanMessage(content=query))
442
+ history.append(AIMessage(content=answer))
443
+ else:
444
+ raise ValueError("No query provided.")
445
+
446
+ except Exception as e:
447
+ logger.error(f"Error on /insurance page: {e}", exc_info=True)
448
+ answer = f"An error occurred: {e}"
449
+ thoughts = traceback.format_exc()
450
+
451
+ session['insurance_history'] = history
452
+ session['latest_insurance_response'] = {
453
+ 'answer': answer,
454
+ 'thoughts': thoughts,
455
+ 'validation': validation,
456
+ 'source': source
457
+ }
458
+ session.modified = True
459
+
460
+ logger.debug(f"Redirecting after saving latest response.")
461
+ return redirect(url_for('insurance_page'))
462
+
463
+ @app.route("/insurance/clear")
464
+ def clear_insurance_chat():
465
+ session.pop('insurance_history', None)
466
+ logger.info("Insurance chat history cleared.")
467
+ return redirect(url_for('insurance_page'))
468
+
469
+ @app.route("/about", methods=["GET"])
470
+ def about():
471
+ return render_template("about.html")
472
+
473
+ @app.route('/metrics/<domain>')
474
+ def get_metrics(domain):
475
+ """API endpoint to get metrics for a specific domain."""
476
+ try:
477
+ if domain == "medical" and rag_systems['medical']:
478
+ stats = rag_systems['medical'].metrics_tracker.get_stats()
479
+ elif domain == "islamic" and rag_systems['islamic']:
480
+ stats = rag_systems['islamic'].metrics_tracker.get_stats()
481
+ elif domain == "insurance" and rag_systems['insurance']:
482
+ stats = rag_systems['insurance'].metrics_tracker.get_stats()
483
+ elif not rag_systems.get(domain):
484
+ return jsonify({"error": f"{domain} RAG system not loaded"}), 500
485
+ else:
486
+ return jsonify({"error": "Invalid domain"}), 400
487
+
488
+ return jsonify(stats)
489
+ except Exception as e:
490
+ return jsonify({"error": str(e)}), 500
491
+
492
+ @app.route('/metrics/reset/<domain>', methods=['POST'])
493
+ def reset_metrics(domain):
494
+ """Reset metrics for a domain (useful for testing)."""
495
+ try:
496
+ if domain == "medical" and rag_systems['medical']:
497
+ rag_systems['medical'].metrics_tracker.reset_metrics()
498
+ elif domain == "islamic" and rag_systems['islamic']:
499
+ rag_systems['islamic'].metrics_tracker.reset_metrics()
500
+ elif domain == "insurance" and rag_systems['insurance']:
501
+ rag_systems['insurance'].metrics_tracker.reset_metrics()
502
+ elif not rag_systems.get(domain):
503
+ return jsonify({"error": f"{domain} RAG system not loaded"}), 500
504
+ else:
505
+ return jsonify({"error": "Invalid domain"}), 400
506
+
507
+ return jsonify({"success": True, "message": f"Metrics reset for {domain}"})
508
+ except Exception as e:
509
+ return jsonify({"error": str(e)}), 500
510
+
511
+ if __name__ == "__main__":
512
+ logger.info("Starting Flask app for deployment testing...")
513
+ # This port 7860 is what Hugging Face Spaces expects by default
 
 
 
 
514
  app.run(host="0.0.0.0", port=7860, debug=False)