prthm11 commited on
Commit
8a5a9dd
·
verified ·
1 Parent(s): b947639

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +631 -630
app.py CHANGED
@@ -1,630 +1,631 @@
1
- # --- IMPORTS ---
2
- from werkzeug.exceptions import TooManyRequests
3
- from flask import Flask, request, jsonify, render_template
4
- from flask_socketio import SocketIO, emit
5
- from langchain_google_genai import ChatGoogleGenerativeAI
6
- from langchain.agents import initialize_agent, AgentType, create_react_agent, AgentExecutor
7
- from langchain_community.agent_toolkits import create_sql_agent, SQLDatabaseToolkit
8
- from langchain_community.utilities import SQLDatabase
9
- from langchain.tools import Tool
10
- from langchain.memory import ConversationBufferMemory
11
- from pymongo import MongoClient
12
- import threading
13
- import os, uuid
14
- import re
15
- import traceback
16
- import ast
17
- from bson import json_util
18
- from dotenv import load_dotenv
19
- from werkzeug.utils import secure_filename
20
- from werkzeug.exceptions import HTTPException
21
- from langchain.prompts import ChatPromptTemplate
22
- from tabulate import tabulate
23
- from fuzzywuzzy import fuzz
24
- # from langchain_groq import ChatGroq
25
- from datetime import datetime
26
-
27
- def error_safe(f):
28
- def wrapper(*args, **kwargs):
29
- try:
30
- return f(*args, **kwargs)
31
- except HTTPException as he:
32
- return jsonify({"status": "error", "message": he.description}), he.code
33
- except Exception as e:
34
- print("[ERROR] Uncaught Exception in", f.__name__)
35
- traceback.print_exc()
36
- return jsonify({"status": "error", "message": str(e)}), 500
37
- wrapper.__name__ = f.__name__
38
- return wrapper
39
-
40
-
41
- # --- ENV + FLASK SETUP ---
42
- load_dotenv()
43
- os.environ["GEMINI_API_KEY"] = os.getenv("GEMINI_API_KEY")
44
-
45
- app = Flask(__name__)
46
- app.config['SECRET_KEY'] = os.urandom(32)
47
- app.config['UPLOAD_FOLDER'] = 'uploads'
48
- socketio = SocketIO(app, cors_allowed_origins="*")
49
- os.makedirs(app.config['UPLOAD_FOLDER'], exist_ok=True)
50
-
51
- llm = ChatGoogleGenerativeAI(
52
- temperature=0.2,
53
- model="gemini-2.0-flash",
54
- max_retries=50,
55
- api_key=os.getenv("GEMINI_API_KEY")
56
- )
57
- # llm = ChatGroq(temperature=0.2, model_name="mistral-saba-24b",api_key=os.getenv("GROQ_API_KEY"))
58
-
59
- # --- GLOBALS ---
60
- agent_executor = None
61
- memory = ConversationBufferMemory(
62
- memory_key="chat_history", return_messages=True, input_key="input")
63
- mongo_db = None
64
- client = None
65
- db_mode = None # "mongo" or "sql"
66
-
67
- # --- SHARED ---
68
-
69
-
70
- def is_schema_request(prompt: str) -> bool:
71
- pattern = re.compile(
72
- r'\b(schema|table names|tables|columns|structure|column names|collections?|field names|metadata|describe|show)\b', re.IGNORECASE)
73
- return bool(pattern.search(prompt))
74
-
75
-
76
- def is_sensitive_request(prompt: str) -> bool:
77
- sensitive_keywords = [
78
- "password", "token", "credential", "secret", "api key", "schema", "structure",
79
- "collection name", "field name", "user_id", "order_id", "payment_id",
80
- "internal", "database structure", "table structure", "email", "phone", "contact", "ssn"
81
- ]
82
- lowered = prompt.lower()
83
- return any(keyword in lowered for keyword in sensitive_keywords)
84
-
85
-
86
- intent_prompt = ChatPromptTemplate.from_messages([
87
- ("system", "Classify if the user is asking schema/structure/sensitive info (tables, columns, schema): YES or NO."),
88
- ("human", "{prompt}")
89
- ])
90
- intent_checker = intent_prompt | llm
91
-
92
-
93
- def is_schema_leak_request(prompt):
94
- try:
95
- classification = intent_checker.invoke({"prompt": prompt})
96
- return "yes" in classification.content.strip().lower()
97
- except:
98
- return False
99
-
100
- # --- INIT SQL AGENT ---
101
- def init_sql_agent(db_path):
102
- global agent_executor, db_mode
103
- db = SQLDatabase.from_uri(f"sqlite:///{db_path}")
104
- toolkit = SQLDatabaseToolkit(db=db, llm=llm)
105
- prefix = '''You are a helpful SQL expert agent that ALWAYS returns natural language answers using the tools.'''
106
- # Always format your responses in Markdown. For example:
107
- # - Use bullet points
108
- # - Use bold for headers
109
- # - Wrap code in triple backticks
110
- # - Tables should use Markdown table syntax
111
-
112
- # You must NEVER:
113
- # - Show or mention SQL syntax.
114
- # - Reveal table names, column names, or database schema.
115
- # - Respond with any technical details or structure of the database.
116
- # - Return code or tool names.
117
- # - Give wrong Answers.
118
-
119
- # You must ALWAYS:
120
- # - Respond in plain, friendly language.
121
- # - Don't Summarize the result for the user (e.g., "There are 9 tables in the system.")
122
- # - If asked to list table names or schema, politely refuse and respond with:
123
- # "I'm sorry, I can't share database structure information."
124
- # - ALWAYS HAVE TO SOLVE COMPLEX USER QUERIES. FOR THAT, UNDERSTAND THE PROMPT, ANALYSE PROPER AND THEN GIVE ANSWER.
125
- # - Your Answers should be correct, you have to do understand process well and give accurate answers.
126
- # - IF USER ASK ABOUT DATA, Which is not there in a database, then GIVE FOLLOWING ANSWER:
127
- # "There is no such data in the Database."
128
-
129
- # Strict Rules You MUST Follow:
130
- # - NEVER display or mention SQL queries.
131
- # - NEVER explain SQL syntax or logic.
132
- # - NEVER return technical or code-like responses.
133
- # - ONLY respond in natural, human-friendly language.
134
- # - You are not allow to give the name of any COLUMNS, TABLES, DATABASE, ENTITY, SYNTAX, STRUCTURE, DESIGN, ETC...
135
-
136
- # If the user asks for anything other than retrieving data (SELECT), respond using this exact message:
137
- # "I'm not allowed to perform operations other than SELECT queries. Please ask something that involves reading data."
138
-
139
- # Do not return SQL queries or raw technical responses to the user.
140
-
141
- # For example:
142
- # Wrong: SELECT * FROM ...
143
- # Correct: The user assigned to the cart is Alice Smith.
144
-
145
- # Use the tools provided to get the correct data from the database and summarize the response clearly.
146
- # If the input is unclear or lacks sufficient data, ask for clarification using the SubmitFinalAnswer tool.
147
- # Never return SQL queries as your response.
148
-
149
- # If you cannot find an answer,
150
- # Double-check your query and running it again.
151
- # - If a query fails, revise and try again.
152
- # - Else 'No data found' using SubmitFinalAnswer.No SQL, no code. '''
153
- agent_executor = create_sql_agent(
154
- llm=llm,
155
- toolkit=toolkit,
156
- verbose=True,
157
- prefix=prefix,
158
- agent_type=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
159
- memory=memory,
160
- agent_executor_kwargs={"handle_parsing_errors": True},
161
- )
162
- db_mode = "sql"
163
-
164
- # --- INIT MONGO AGENT ---
165
- system_message = """
166
- You are **MongoDBQueryBot**, a highly intelligent and accurate assistant for answering questions about data stored in a MongoDB database using tools.
167
- """
168
-
169
- # ### 🚨 Critical Instructions (Strictly Follow These):
170
- # - You **must always** use tools provided to answer user questions.
171
- # - Always join IDs with associated human-readable values like names or titles when answering.
172
- # - Prefer displaying `user name`, `employee name`, or `product name` instead of internal IDs like `user_id`, `emp_id`, or `product_id`.
173
- # - Avoid responding only with technical identifiers. Make responses meaningful to users.
174
- # - **Never** guess or fabricate any information.
175
- # - **Do not** show raw JSON, field names, or database structure.
176
- # - Your role is **read-only**: do not suggest or perform insert/update/delete.
177
- # - After Using All the available tools, if you are Unable to find any documents, then give followig ANSWER:
178
- # "Please, rephrase your query because I can't exactly understand, what you want !"
179
- # - If a query can't be answered or is unrelated to reading data, reply:
180
- # ❌ "I'm only allowed to retrieve data. Please ask a query involving reading information."
181
- # - IF USER ASK ABOUT DATA, Which is not there in a database, then GIVE FOLLOWING ANSWER:
182
- # "There is no such data in the Database."
183
- # - When returning answers:
184
- # - Do **not return internal IDs** like `user_id`, `order_id`, `payment_id`, etc.
185
- # - Instead, use human-readable fields like `name`, `full_name`, `user_name`, etc., from related collections.
186
- # - If only an ID is available, try joining the relevant collections to fetch the proper display name.
187
-
188
- # ### 🧠 How to Think:
189
- # - Understand **exactly** what the user is trying to ask. Do not answer if unclear — ask for clarification.
190
- # - Translate the user prompt into tool inputs by identifying:
191
- # - Which collection to search
192
- # - What value or field they're referring to
193
- # - The correct format expected by the tool
194
-
195
- # ### 🛠️ Tool Usage Guide:
196
- # - Use `FindDocuments` for queries like:
197
- # - "Show me all employees named John"
198
- # - "What is the salary of Manager X?"
199
- # - Use `ListCollections` to discover available data types (but don’t share them directly).
200
- # - **IMPORTANT : Don't Iterate only in one tool, if you can't able to answer using current tool you using, then swith the tool !**
201
- # - Use `JoinCollections` to resolve IDs into names when the question asks about people, customers, or products.
202
- # - When resolving names from payments, use this format:
203
- # `from=Payments, key=order_id, to=Orders, match=order_id, next_key=user_id, next_to=Users, next_match=user_id, return=name`
204
-
205
- # - Your goal is to **return the person's name** (e.g., `name`, `user_name`, `full_name`) not their ID.
206
- # - Always prioritize returning names instead of internal identifiers.
207
- # - Examples:
208
- # - For payment-related questions → Join Payments → Orders → Users and return name
209
- # - For order questions → Join Orders → Users and return user names
210
-
211
- # ### 🧾 Response Format:
212
- # - Use **clear markdown with tables** when displaying data.
213
- # - If no data is found: return `**No documents found.**`
214
- # - Stay professional, brief, and relevant.
215
-
216
- # ### 🚫 Never Do This:
217
- # - Do not leak MongoDB structure, schema, or field names.
218
- # - Do not suggest code, MongoDB syntax, or field mappings.
219
- # - Do not hallucinate or make assumptions.
220
-
221
- # Start by analyzing the prompt carefully, select the right tool, invoke it, and return a user-friendly answer based on the result.
222
- # """
223
-
224
-
225
- def find_docs_tool_func(query: str) -> str:
226
- """
227
- Flexible MongoDB search with fallback:
228
- - First tries in specified collection.
229
- - If no results found, falls back to search across all collections.
230
- Input format:
231
- - collection=<collection>, key=<field>, value=<value>
232
- - OR: collection=<collection>, value=<value>
233
- """
234
- try:
235
- parts = dict(part.strip().split("=", 1)
236
- for part in query.split(",") if "=" in part)
237
- collection = parts.get("collection")
238
- key = parts.get("key")
239
- value = parts.get("value")
240
- if not collection:
241
- return "❌ 'collection' is required."
242
-
243
- def query_collection(coll_name):
244
- if key and value:
245
- return list(mongo_db[coll_name].find({key: value}, {'_id': 0}))
246
- elif value:
247
- return [doc for doc in mongo_db[coll_name].find({}, {'_id': 0}) if any(str(v).lower() == value.lower() for v in doc.values())]
248
- else:
249
- return list(mongo_db[coll_name].find({}, {'_id': 0}))
250
-
251
- docs = query_collection(collection)
252
- if docs:
253
- return "\n markdown\n" + tabulate(docs, headers="keys", tablefmt="github") + "\n"
254
-
255
- for coll in mongo_db.list_collection_names():
256
- if coll == collection:
257
- continue
258
- docs = query_collection(coll)
259
- if docs:
260
- return "\n markdown\n" + tabulate(docs, headers="keys", tablefmt="github") + "\n"
261
-
262
- return "**No documents found.**"
263
- except Exception as e:
264
- return f"Invalid input format or error: {str(e)}"
265
-
266
-
267
- def aggregate_group_by(_input: str):
268
- try:
269
- if _input.strip().startswith("{"):
270
- # Parse JSON-like string
271
- args = ast.literal_eval(_input)
272
- collection = args.get("collection_name") or args.get("collection")
273
- field = args.get("group_by") or args.get("field")
274
- else:
275
- # Handle legacy input format
276
- args = dict(x.split("=") for x in _input.split(","))
277
- collection = args["collection"]
278
- field = args["field"]
279
-
280
- pipeline = [
281
- {"$group": {"_id": f"${field}", "count": {"$sum": 1}}},
282
- {"$project": {"_id": 0, field: "$_id", "count": 1}}
283
- ]
284
- result = list(mongo_db[collection].aggregate(pipeline))
285
- if not result:
286
- return "**No data found.**"
287
- return "\n markdown\n" + tabulate(result, headers="keys", tablefmt="github") + "\n"
288
- except Exception as e:
289
- return f"Aggregation failed: {e}"
290
-
291
-
292
- def get_all_documents(collection: str):
293
- try:
294
- docs = list(mongo_db[collection].find({}, {'_id': 0}))
295
- if not docs:
296
- return "**No documents found.**"
297
- return "\n markdown\n" + tabulate(docs, headers="keys", tablefmt="github") + "\n"
298
- except Exception as e:
299
- return f"Error fetching documents: {e}"
300
-
301
-
302
- def fuzzy_find_documents(query: str):
303
- try:
304
- parts = dict(part.strip().split("=", 1) for part in query.split(","))
305
- collection = parts["collection"]
306
- value = parts["value"]
307
- threshold = int(parts.get("threshold", 80))
308
-
309
- matches = []
310
- for doc in mongo_db[collection].find({}, {'_id': 0}):
311
- if any(fuzz.partial_ratio(str(v).lower(), value.lower()) >= threshold for v in doc.values()):
312
- matches.append(doc)
313
- if not matches:
314
- return "**No fuzzy matches found.**"
315
- return "\n markdown\n" + tabulate(matches, headers="keys", tablefmt="github") + "\n"
316
- except Exception as e:
317
- return f"Fuzzy match error: {e}"
318
-
319
- # def join_collections_tool_func(_input: str):
320
- # try:
321
- # # Parse input like: from=Products, key=category_id, to=Categories, match=category_id, return=category_name
322
- # args = dict(x.strip().split("=", 1) for x in _input.split(","))
323
- # from_collection = args["from"]
324
- # foreign_key = args["key"]
325
- # to_collection = args["to"]
326
- # match_key = args["match"]
327
- # return_field = args["return"]
328
-
329
- # results = []
330
- # foreign_lookup = {
331
- # doc[match_key]: doc.get(return_field)
332
- # for doc in mongo_db[to_collection].find()
333
- # if match_key in doc
334
- # }
335
-
336
- # for doc in mongo_db[from_collection].find({}, {'_id': 0}):
337
- # doc[return_field] = foreign_lookup.get(doc.get(foreign_key), "Unknown")
338
- # results.append(doc)
339
-
340
- # if not results:
341
- # return "**No documents found.**"
342
-
343
- # return "\n markdown\n" + tabulate(results, headers="keys", tablefmt="github") + "\n"
344
-
345
- # except Exception as e:
346
- # return f"Join failed: {e}"
347
-
348
-
349
- def join_collections_tool_func(_input: str):
350
- """
351
- Supports 2-level join (Payments → Orders → Users) or any pair-wise join
352
- Input formats:
353
- - from=Payments, key=order_id, to=Orders, match=order_id, next_key=user_id, next_to=Users, next_match=user_id, return=name
354
- - from=Products, key=category_id, to=Categories, match=category_id, return=category_name
355
- """
356
- try:
357
- args = dict(x.strip().split("=", 1) for x in _input.split(","))
358
- from_coll = args["from"]
359
- key = args["key"]
360
- to_coll = args["to"]
361
- match = args["match"]
362
- return_field = args["return"]
363
-
364
- next_key = args.get("next_key")
365
- next_to = args.get("next_to")
366
- next_match = args.get("next_match")
367
-
368
- # First join (e.g., Payments → Orders)
369
- to_docs = {doc[match]: doc for doc in mongo_db[to_coll].find()
370
- if match in doc}
371
- joined = []
372
- for doc in mongo_db[from_coll].find({}, {'_id': 0}):
373
- foreign_doc = to_docs.get(doc.get(key))
374
- if not foreign_doc:
375
- continue
376
- merged = {**doc, **foreign_doc}
377
- joined.append(merged)
378
-
379
- # Second join (e.g., Orders → Users)
380
- if next_key and next_to and next_match:
381
- next_docs = {
382
- doc[next_match]: doc for doc in mongo_db[next_to].find() if next_match in doc}
383
- for doc in joined:
384
- user_doc = next_docs.get(doc.get(next_key))
385
- if user_doc:
386
- doc[return_field] = user_doc.get(return_field, "Unknown")
387
- else:
388
- doc[return_field] = "Unknown"
389
-
390
- # Prepare final result
391
- if not joined:
392
- return "**No documents found.**"
393
- final = [{return_field: doc.get(return_field)}
394
- for doc in joined if return_field in doc]
395
- return "\n markdown\n" + tabulate(final, headers="keys", tablefmt="github") + "\n"
396
-
397
- except Exception as e:
398
- return f"Join failed: {e}"
399
-
400
-
401
- def smart_join_router(prompt: str) -> str:
402
- """
403
- An intelligent router that suggests the correct JoinCollections input string
404
- for common user intent like payments → orders → users → name.
405
- """
406
- prompt_lower = prompt.lower()
407
- if "payment" in prompt_lower and any(term in prompt_lower for term in ["who", "name", "user", "person"]):
408
- return "from=Payments, key=order_id, to=Orders, match=order_id, next_key=user_id, next_to=Users, next_match=user_id, return=name"
409
- elif "order" in prompt_lower and "name" in prompt_lower:
410
- return "from=Orders, key=user_id, to=Users, match=user_id, return=name"
411
- # Extend as needed
412
- return "Unable to auto-generate join path. Please provide more context."
413
-
414
-
415
- def init_mongo_agent(json_path):
416
- global agent_executor, client, mongo_db, db_mode
417
-
418
- client = MongoClient("mongodb://localhost:27017/")
419
- mongo_db = client['uploaded_mongo']
420
- with open(json_path, 'r', encoding='utf-8') as f:
421
- data = json_util.loads(f.read())
422
-
423
- # Handle both single-collection and multi-collection formats
424
- if isinstance(data, list):
425
- # Default collection name if only a list is provided
426
- collection = mongo_db['default_collection']
427
- collection.drop()
428
- collection.insert_many(data)
429
- elif isinstance(data, dict):
430
- for col_name, docs in data.items():
431
- collection = mongo_db[col_name]
432
- collection.drop()
433
- if isinstance(docs, list):
434
- collection.insert_many(docs)
435
- else:
436
- collection.insert_one(docs)
437
- else:
438
- raise ValueError("Unsupported JSON format. Must be a list or dict.")
439
-
440
- def list_collections(_input=None):
441
- return mongo_db.list_collection_names()
442
-
443
- find_docs_tool = Tool(
444
- name="FindDocuments",
445
- description=(
446
- "Use this tool to find documents in a MongoDB collection.\n"
447
- "Input format:\n"
448
- "- `collection=<collection>, key=<field>, value=<value>` for precise queries\n"
449
- "- OR `collection=<collection>, value=<value>` to search across all fields\n"
450
- "If `key` is omitted, the tool will automatically scan all fields to find matching values.\n"
451
- "Examples:\n"
452
- "- `collection=default_collection, key=name, value=Lauren Alexander`\n"
453
- "- `collection=default_collection, value=Lauren Alexander`"
454
- ),
455
- func=find_docs_tool_func)
456
-
457
- aggregate_tool = Tool(
458
- name="AggregateGroupBy",
459
- func=aggregate_group_by,
460
- description=(
461
- "Group documents and count by any field. Format: collection=<name>, field=<group_by_field>. E.g., collection=residents, field=gender"
462
- )
463
- )
464
- get_all_documents_tool = Tool(
465
- name="GetAllDocuments",
466
- func=get_all_documents,
467
- description=(
468
- "Fetch all documents from a collection. Input: collection name only. Example: residents"
469
- )
470
- )
471
-
472
- fuzzy_tool = Tool(
473
- name="FuzzyFindDocuments",
474
- func=fuzzy_find_documents,
475
- description=("Fuzzy match documents across all fields in a collection. Format: collection=<name>, value=<search_term>, threshold=80 (optional)"
476
- )
477
- )
478
-
479
- join_collection_tool = Tool(
480
- name="JoinCollections",
481
- func=join_collections_tool_func,
482
- description=(
483
- "Join collections to map foreign keys to human-readable values. Supports 1 or 2-level joins.\n"
484
- "Formats:\n"
485
- "- from=Payments, key=order_id, to=Orders, match=order_id, return=status\n"
486
- "- from=Payments, key=order_id, to=Orders, match=order_id, next_key=user_id, next_to=Users, next_match=user_id, return=name"
487
- )
488
- )
489
- smart_router_tool = Tool(
490
- name="SmartJoinRouter",
491
- func=smart_join_router,
492
- description=(
493
- "Suggest the correct JoinCollections input format based on user intent.\n"
494
- "Use this when you are unsure how to form the join input."
495
- )
496
- )
497
-
498
- tools = [
499
- Tool(name="FindDocuments", func=find_docs_tool,
500
- description="Flexible MongoDB search..."),
501
- Tool(name="ListCollections", func=lambda x: list_collections(),
502
- description="List all collections..."),
503
- Tool(name="AggregateGroupBy", func=aggregate_tool,
504
- description="Group and count by any field..."),
505
- Tool(name="GetAllDocuments", func=get_all_documents_tool,
506
- description="Fetch all documents from a collection..."),
507
- Tool(name="FuzzyFindDocuments", func=fuzzy_tool,
508
- description="Fuzzy match documents across all fields..."),
509
- Tool(name="JoinCollections", func=join_collection_tool,
510
- description="Join related collections to return names instead of IDs..."),
511
- Tool(name="SmartJoinCollections", func=smart_router_tool,
512
- description="Smrt Join related collections to return names instead of IDs...")
513
- ]
514
-
515
- agent_executor = initialize_agent(
516
- tools=tools,
517
- llm=llm,
518
- agent_type=AgentType.CONVERSATIONAL_REACT_DESCRIPTION,
519
- memory=memory,
520
- verbose=True,
521
- prefix=system_message,
522
- handle_parsing_errors=True
523
- )
524
- db_mode = "mongo"
525
-
526
-
527
- @app.errorhandler(Exception)
528
- def handle_all_errors(e):
529
- print(f"[ERROR] Global handler caught an exception: {str(e)}")
530
- traceback.print_exc()
531
-
532
- if isinstance(e, HTTPException):
533
- return jsonify({"status": "error", "message": e.description}), e.code
534
-
535
- return jsonify({"status": "error", "message": "An unexpected error occurred"}), 500
536
-
537
-
538
- @app.errorhandler(TooManyRequests)
539
- def handle_429_error(e):
540
- return jsonify({
541
- "status": "error",
542
- "message": "🚦 Agent is busy, try again after sometime."
543
- }), 429
544
-
545
- # --- ROUTES ---
546
-
547
-
548
- @app.route("/")
549
- def index():
550
- return render_template("app_index.html")
551
-
552
-
553
- @app.route("/upload_db", methods=["POST"])
554
- @error_safe
555
- def upload_db():
556
- file = request.files.get("file")
557
- if not file or file.filename == "":
558
- return jsonify(success=False, message="No file provided"), 400
559
-
560
- filename = secure_filename(file.filename)
561
- path = os.path.join(app.config["UPLOAD_FOLDER"], filename)
562
- file.save(path)
563
-
564
- try:
565
- if filename.endswith(".json"):
566
- init_mongo_agent(path)
567
- mongo_db = globals().get("mongo_db")
568
- db_name = getattr(mongo_db, "name", None) or os.path.splitext(filename)[0]
569
- return jsonify({"database_name": db_name, "message": "MongoDB initialized"}), 200
570
- # return jsonify(success=True, message="MongoDB initialized")
571
- # elif filename.endswith(".db"):
572
- # init_sql_agent(path)
573
- # return jsonify(success=True, message="SQL DB initialized")
574
- # SQL DB (.db or .sqlite)
575
- elif filename.lower().endswith(".db") or filename.lower().endswith(".sqlite"):
576
- init_sql_agent(path) # your existing initializer
577
- db_name = os.path.splitext(filename)[0]
578
- return jsonify({"database_name": db_name, "message": "SQL DB initialized"}), 200
579
- else:
580
- return jsonify(success=False, message="Unsupported file format"), 400
581
- except Exception as e:
582
- traceback.print_exc()
583
- return jsonify(success=False, message=f"Init failed: {e}"), 500
584
-
585
- @app.route("/generate", methods=["POST"])
586
- @error_safe
587
- def generate():
588
- try:
589
- data = request.get_json(force=True) or {}
590
- prompt = data.get("prompt", "").strip()
591
- if not prompt:
592
- return jsonify({"status": "error", "message": "Prompt is required"}), 400
593
- except Exception:
594
- traceback.print_exc()
595
- return jsonify({"status": "error", "message": "Invalid input"}), 400
596
-
597
- try:
598
- # invoke your agent synchronously
599
- result = agent_executor.invoke({"input": prompt})
600
-
601
- # Normalize final_answer from agent output safely
602
- if isinstance(result, dict):
603
- final_answer = (
604
- result.get("final_answer")
605
- or result.get("output")
606
- or result.get("answer")
607
- or result.get("text")
608
- or ""
609
- )
610
- else:
611
- final_answer = str(result or "")
612
-
613
- if final_answer is None:
614
- final_answer = ""
615
-
616
- # Optionally keep emitting to socket so clients listening to socketio still get it
617
- try:
618
- socketio.emit("final", {"message": final_answer})
619
- except Exception:
620
- app.logger.debug("socket emit failed, continuing")
621
-
622
- return jsonify({"final_answer": final_answer, "prompt": prompt}), 200
623
-
624
- except Exception as e:
625
- app.logger.exception("Agent invocation failed")
626
- return jsonify({"prompt": prompt, "final_answer": "", "message": f"Agent error: {str(e)[:200]}"}), 500
627
-
628
-
629
- if __name__ == "__main__":
630
- socketio.run(app, debug=True)
 
 
1
+ # --- IMPORTS ---
2
+ from werkzeug.exceptions import TooManyRequests
3
+ from flask import Flask, request, jsonify, render_template
4
+ from flask_socketio import SocketIO, emit
5
+ from langchain_google_genai import ChatGoogleGenerativeAI
6
+ from langchain.agents import initialize_agent, AgentType, create_react_agent, AgentExecutor
7
+ from langchain_community.agent_toolkits import create_sql_agent, SQLDatabaseToolkit
8
+ from langchain_community.utilities import SQLDatabase
9
+ from langchain.tools import Tool
10
+ from langchain.memory import ConversationBufferMemory
11
+ from pymongo import MongoClient
12
+ import threading
13
+ import os, uuid
14
+ import re
15
+ import traceback
16
+ import ast
17
+ from bson import json_util
18
+ from dotenv import load_dotenv
19
+ from werkzeug.utils import secure_filename
20
+ from werkzeug.exceptions import HTTPException
21
+ from langchain.prompts import ChatPromptTemplate
22
+ from tabulate import tabulate
23
+ from fuzzywuzzy import fuzz
24
+ # from langchain_groq import ChatGroq
25
+ from datetime import datetime
26
+
27
+ def error_safe(f):
28
+ def wrapper(*args, **kwargs):
29
+ try:
30
+ return f(*args, **kwargs)
31
+ except HTTPException as he:
32
+ return jsonify({"status": "error", "message": he.description}), he.code
33
+ except Exception as e:
34
+ print("[ERROR] Uncaught Exception in", f.__name__)
35
+ traceback.print_exc()
36
+ return jsonify({"status": "error", "message": str(e)}), 500
37
+ wrapper.__name__ = f.__name__
38
+ return wrapper
39
+
40
+
41
+ # --- ENV + FLASK SETUP ---
42
+ load_dotenv()
43
+ os.environ["GEMINI_API_KEY"] = os.getenv("GEMINI_API_KEY")
44
+
45
+ app = Flask(__name__)
46
+ app.config['SECRET_KEY'] = os.urandom(32)
47
+ app.config['UPLOAD_FOLDER'] = 'uploads'
48
+ socketio = SocketIO(app, cors_allowed_origins="*")
49
+ os.makedirs(app.config['UPLOAD_FOLDER'], exist_ok=True)
50
+
51
+ llm = ChatGoogleGenerativeAI(
52
+ temperature=0.2,
53
+ model="gemini-2.0-flash",
54
+ max_retries=50,
55
+ api_key=os.getenv("GEMINI_API_KEY")
56
+ )
57
+ # llm = ChatGroq(temperature=0.2, model_name="mistral-saba-24b",api_key=os.getenv("GROQ_API_KEY"))
58
+
59
+ # --- GLOBALS ---
60
+ agent_executor = None
61
+ memory = ConversationBufferMemory(
62
+ memory_key="chat_history", return_messages=True, input_key="input")
63
+ mongo_db = None
64
+ client = None
65
+ db_mode = None # "mongo" or "sql"
66
+
67
+ # --- SHARED ---
68
+
69
+
70
+ def is_schema_request(prompt: str) -> bool:
71
+ pattern = re.compile(
72
+ r'\b(schema|table names|tables|columns|structure|column names|collections?|field names|metadata|describe|show)\b', re.IGNORECASE)
73
+ return bool(pattern.search(prompt))
74
+
75
+
76
+ def is_sensitive_request(prompt: str) -> bool:
77
+ sensitive_keywords = [
78
+ "password", "token", "credential", "secret", "api key", "schema", "structure",
79
+ "collection name", "field name", "user_id", "order_id", "payment_id",
80
+ "internal", "database structure", "table structure", "email", "phone", "contact", "ssn"
81
+ ]
82
+ lowered = prompt.lower()
83
+ return any(keyword in lowered for keyword in sensitive_keywords)
84
+
85
+
86
+ intent_prompt = ChatPromptTemplate.from_messages([
87
+ ("system", "Classify if the user is asking schema/structure/sensitive info (tables, columns, schema): YES or NO."),
88
+ ("human", "{prompt}")
89
+ ])
90
+ intent_checker = intent_prompt | llm
91
+
92
+
93
+ def is_schema_leak_request(prompt):
94
+ try:
95
+ classification = intent_checker.invoke({"prompt": prompt})
96
+ return "yes" in classification.content.strip().lower()
97
+ except:
98
+ return False
99
+
100
+ # --- INIT SQL AGENT ---
101
+ def init_sql_agent(db_path):
102
+ global agent_executor, db_mode
103
+ db = SQLDatabase.from_uri(f"sqlite:///{db_path}")
104
+ toolkit = SQLDatabaseToolkit(db=db, llm=llm)
105
+ prefix = '''You are a helpful SQL expert agent that ALWAYS returns natural language answers using the tools.'''
106
+ # Always format your responses in Markdown. For example:
107
+ # - Use bullet points
108
+ # - Use bold for headers
109
+ # - Wrap code in triple backticks
110
+ # - Tables should use Markdown table syntax
111
+
112
+ # You must NEVER:
113
+ # - Show or mention SQL syntax.
114
+ # - Reveal table names, column names, or database schema.
115
+ # - Respond with any technical details or structure of the database.
116
+ # - Return code or tool names.
117
+ # - Give wrong Answers.
118
+
119
+ # You must ALWAYS:
120
+ # - Respond in plain, friendly language.
121
+ # - Don't Summarize the result for the user (e.g., "There are 9 tables in the system.")
122
+ # - If asked to list table names or schema, politely refuse and respond with:
123
+ # "I'm sorry, I can't share database structure information."
124
+ # - ALWAYS HAVE TO SOLVE COMPLEX USER QUERIES. FOR THAT, UNDERSTAND THE PROMPT, ANALYSE PROPER AND THEN GIVE ANSWER.
125
+ # - Your Answers should be correct, you have to do understand process well and give accurate answers.
126
+ # - IF USER ASK ABOUT DATA, Which is not there in a database, then GIVE FOLLOWING ANSWER:
127
+ # "There is no such data in the Database."
128
+
129
+ # Strict Rules You MUST Follow:
130
+ # - NEVER display or mention SQL queries.
131
+ # - NEVER explain SQL syntax or logic.
132
+ # - NEVER return technical or code-like responses.
133
+ # - ONLY respond in natural, human-friendly language.
134
+ # - You are not allow to give the name of any COLUMNS, TABLES, DATABASE, ENTITY, SYNTAX, STRUCTURE, DESIGN, ETC...
135
+
136
+ # If the user asks for anything other than retrieving data (SELECT), respond using this exact message:
137
+ # "I'm not allowed to perform operations other than SELECT queries. Please ask something that involves reading data."
138
+
139
+ # Do not return SQL queries or raw technical responses to the user.
140
+
141
+ # For example:
142
+ # Wrong: SELECT * FROM ...
143
+ # Correct: The user assigned to the cart is Alice Smith.
144
+
145
+ # Use the tools provided to get the correct data from the database and summarize the response clearly.
146
+ # If the input is unclear or lacks sufficient data, ask for clarification using the SubmitFinalAnswer tool.
147
+ # Never return SQL queries as your response.
148
+
149
+ # If you cannot find an answer,
150
+ # Double-check your query and running it again.
151
+ # - If a query fails, revise and try again.
152
+ # - Else 'No data found' using SubmitFinalAnswer.No SQL, no code. '''
153
+ agent_executor = create_sql_agent(
154
+ llm=llm,
155
+ toolkit=toolkit,
156
+ verbose=True,
157
+ prefix=prefix,
158
+ agent_type=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
159
+ memory=memory,
160
+ agent_executor_kwargs={"handle_parsing_errors": True},
161
+ )
162
+ db_mode = "sql"
163
+
164
+ # --- INIT MONGO AGENT ---
165
+ system_message = """
166
+ You are **MongoDBQueryBot**, a highly intelligent and accurate assistant for answering questions about data stored in a MongoDB database using tools.
167
+ """
168
+
169
+ # ### 🚨 Critical Instructions (Strictly Follow These):
170
+ # - You **must always** use tools provided to answer user questions.
171
+ # - Always join IDs with associated human-readable values like names or titles when answering.
172
+ # - Prefer displaying `user name`, `employee name`, or `product name` instead of internal IDs like `user_id`, `emp_id`, or `product_id`.
173
+ # - Avoid responding only with technical identifiers. Make responses meaningful to users.
174
+ # - **Never** guess or fabricate any information.
175
+ # - **Do not** show raw JSON, field names, or database structure.
176
+ # - Your role is **read-only**: do not suggest or perform insert/update/delete.
177
+ # - After Using All the available tools, if you are Unable to find any documents, then give followig ANSWER:
178
+ # "Please, rephrase your query because I can't exactly understand, what you want !"
179
+ # - If a query can't be answered or is unrelated to reading data, reply:
180
+ # ❌ "I'm only allowed to retrieve data. Please ask a query involving reading information."
181
+ # - IF USER ASK ABOUT DATA, Which is not there in a database, then GIVE FOLLOWING ANSWER:
182
+ # "There is no such data in the Database."
183
+ # - When returning answers:
184
+ # - Do **not return internal IDs** like `user_id`, `order_id`, `payment_id`, etc.
185
+ # - Instead, use human-readable fields like `name`, `full_name`, `user_name`, etc., from related collections.
186
+ # - If only an ID is available, try joining the relevant collections to fetch the proper display name.
187
+
188
+ # ### 🧠 How to Think:
189
+ # - Understand **exactly** what the user is trying to ask. Do not answer if unclear — ask for clarification.
190
+ # - Translate the user prompt into tool inputs by identifying:
191
+ # - Which collection to search
192
+ # - What value or field they're referring to
193
+ # - The correct format expected by the tool
194
+
195
+ # ### 🛠️ Tool Usage Guide:
196
+ # - Use `FindDocuments` for queries like:
197
+ # - "Show me all employees named John"
198
+ # - "What is the salary of Manager X?"
199
+ # - Use `ListCollections` to discover available data types (but don’t share them directly).
200
+ # - **IMPORTANT : Don't Iterate only in one tool, if you can't able to answer using current tool you using, then swith the tool !**
201
+ # - Use `JoinCollections` to resolve IDs into names when the question asks about people, customers, or products.
202
+ # - When resolving names from payments, use this format:
203
+ # `from=Payments, key=order_id, to=Orders, match=order_id, next_key=user_id, next_to=Users, next_match=user_id, return=name`
204
+
205
+ # - Your goal is to **return the person's name** (e.g., `name`, `user_name`, `full_name`) not their ID.
206
+ # - Always prioritize returning names instead of internal identifiers.
207
+ # - Examples:
208
+ # - For payment-related questions → Join Payments → Orders → Users and return name
209
+ # - For order questions → Join Orders → Users and return user names
210
+
211
+ # ### 🧾 Response Format:
212
+ # - Use **clear markdown with tables** when displaying data.
213
+ # - If no data is found: return `**No documents found.**`
214
+ # - Stay professional, brief, and relevant.
215
+
216
+ # ### 🚫 Never Do This:
217
+ # - Do not leak MongoDB structure, schema, or field names.
218
+ # - Do not suggest code, MongoDB syntax, or field mappings.
219
+ # - Do not hallucinate or make assumptions.
220
+
221
+ # Start by analyzing the prompt carefully, select the right tool, invoke it, and return a user-friendly answer based on the result.
222
+ # """
223
+
224
+
225
+ def find_docs_tool_func(query: str) -> str:
226
+ """
227
+ Flexible MongoDB search with fallback:
228
+ - First tries in specified collection.
229
+ - If no results found, falls back to search across all collections.
230
+ Input format:
231
+ - collection=<collection>, key=<field>, value=<value>
232
+ - OR: collection=<collection>, value=<value>
233
+ """
234
+ try:
235
+ parts = dict(part.strip().split("=", 1)
236
+ for part in query.split(",") if "=" in part)
237
+ collection = parts.get("collection")
238
+ key = parts.get("key")
239
+ value = parts.get("value")
240
+ if not collection:
241
+ return "❌ 'collection' is required."
242
+
243
+ def query_collection(coll_name):
244
+ if key and value:
245
+ return list(mongo_db[coll_name].find({key: value}, {'_id': 0}))
246
+ elif value:
247
+ return [doc for doc in mongo_db[coll_name].find({}, {'_id': 0}) if any(str(v).lower() == value.lower() for v in doc.values())]
248
+ else:
249
+ return list(mongo_db[coll_name].find({}, {'_id': 0}))
250
+
251
+ docs = query_collection(collection)
252
+ if docs:
253
+ return "\n markdown\n" + tabulate(docs, headers="keys", tablefmt="github") + "\n"
254
+
255
+ for coll in mongo_db.list_collection_names():
256
+ if coll == collection:
257
+ continue
258
+ docs = query_collection(coll)
259
+ if docs:
260
+ return "\n markdown\n" + tabulate(docs, headers="keys", tablefmt="github") + "\n"
261
+
262
+ return "**No documents found.**"
263
+ except Exception as e:
264
+ return f"Invalid input format or error: {str(e)}"
265
+
266
+
267
+ def aggregate_group_by(_input: str):
268
+ try:
269
+ if _input.strip().startswith("{"):
270
+ # Parse JSON-like string
271
+ args = ast.literal_eval(_input)
272
+ collection = args.get("collection_name") or args.get("collection")
273
+ field = args.get("group_by") or args.get("field")
274
+ else:
275
+ # Handle legacy input format
276
+ args = dict(x.split("=") for x in _input.split(","))
277
+ collection = args["collection"]
278
+ field = args["field"]
279
+
280
+ pipeline = [
281
+ {"$group": {"_id": f"${field}", "count": {"$sum": 1}}},
282
+ {"$project": {"_id": 0, field: "$_id", "count": 1}}
283
+ ]
284
+ result = list(mongo_db[collection].aggregate(pipeline))
285
+ if not result:
286
+ return "**No data found.**"
287
+ return "\n markdown\n" + tabulate(result, headers="keys", tablefmt="github") + "\n"
288
+ except Exception as e:
289
+ return f"Aggregation failed: {e}"
290
+
291
+
292
+ def get_all_documents(collection: str):
293
+ try:
294
+ docs = list(mongo_db[collection].find({}, {'_id': 0}))
295
+ if not docs:
296
+ return "**No documents found.**"
297
+ return "\n markdown\n" + tabulate(docs, headers="keys", tablefmt="github") + "\n"
298
+ except Exception as e:
299
+ return f"Error fetching documents: {e}"
300
+
301
+
302
+ def fuzzy_find_documents(query: str):
303
+ try:
304
+ parts = dict(part.strip().split("=", 1) for part in query.split(","))
305
+ collection = parts["collection"]
306
+ value = parts["value"]
307
+ threshold = int(parts.get("threshold", 80))
308
+
309
+ matches = []
310
+ for doc in mongo_db[collection].find({}, {'_id': 0}):
311
+ if any(fuzz.partial_ratio(str(v).lower(), value.lower()) >= threshold for v in doc.values()):
312
+ matches.append(doc)
313
+ if not matches:
314
+ return "**No fuzzy matches found.**"
315
+ return "\n markdown\n" + tabulate(matches, headers="keys", tablefmt="github") + "\n"
316
+ except Exception as e:
317
+ return f"Fuzzy match error: {e}"
318
+
319
+ # def join_collections_tool_func(_input: str):
320
+ # try:
321
+ # # Parse input like: from=Products, key=category_id, to=Categories, match=category_id, return=category_name
322
+ # args = dict(x.strip().split("=", 1) for x in _input.split(","))
323
+ # from_collection = args["from"]
324
+ # foreign_key = args["key"]
325
+ # to_collection = args["to"]
326
+ # match_key = args["match"]
327
+ # return_field = args["return"]
328
+
329
+ # results = []
330
+ # foreign_lookup = {
331
+ # doc[match_key]: doc.get(return_field)
332
+ # for doc in mongo_db[to_collection].find()
333
+ # if match_key in doc
334
+ # }
335
+
336
+ # for doc in mongo_db[from_collection].find({}, {'_id': 0}):
337
+ # doc[return_field] = foreign_lookup.get(doc.get(foreign_key), "Unknown")
338
+ # results.append(doc)
339
+
340
+ # if not results:
341
+ # return "**No documents found.**"
342
+
343
+ # return "\n markdown\n" + tabulate(results, headers="keys", tablefmt="github") + "\n"
344
+
345
+ # except Exception as e:
346
+ # return f"Join failed: {e}"
347
+
348
+
349
+ def join_collections_tool_func(_input: str):
350
+ """
351
+ Supports 2-level join (Payments → Orders → Users) or any pair-wise join
352
+ Input formats:
353
+ - from=Payments, key=order_id, to=Orders, match=order_id, next_key=user_id, next_to=Users, next_match=user_id, return=name
354
+ - from=Products, key=category_id, to=Categories, match=category_id, return=category_name
355
+ """
356
+ try:
357
+ args = dict(x.strip().split("=", 1) for x in _input.split(","))
358
+ from_coll = args["from"]
359
+ key = args["key"]
360
+ to_coll = args["to"]
361
+ match = args["match"]
362
+ return_field = args["return"]
363
+
364
+ next_key = args.get("next_key")
365
+ next_to = args.get("next_to")
366
+ next_match = args.get("next_match")
367
+
368
+ # First join (e.g., Payments → Orders)
369
+ to_docs = {doc[match]: doc for doc in mongo_db[to_coll].find()
370
+ if match in doc}
371
+ joined = []
372
+ for doc in mongo_db[from_coll].find({}, {'_id': 0}):
373
+ foreign_doc = to_docs.get(doc.get(key))
374
+ if not foreign_doc:
375
+ continue
376
+ merged = {**doc, **foreign_doc}
377
+ joined.append(merged)
378
+
379
+ # Second join (e.g., Orders → Users)
380
+ if next_key and next_to and next_match:
381
+ next_docs = {
382
+ doc[next_match]: doc for doc in mongo_db[next_to].find() if next_match in doc}
383
+ for doc in joined:
384
+ user_doc = next_docs.get(doc.get(next_key))
385
+ if user_doc:
386
+ doc[return_field] = user_doc.get(return_field, "Unknown")
387
+ else:
388
+ doc[return_field] = "Unknown"
389
+
390
+ # Prepare final result
391
+ if not joined:
392
+ return "**No documents found.**"
393
+ final = [{return_field: doc.get(return_field)}
394
+ for doc in joined if return_field in doc]
395
+ return "\n markdown\n" + tabulate(final, headers="keys", tablefmt="github") + "\n"
396
+
397
+ except Exception as e:
398
+ return f"Join failed: {e}"
399
+
400
+
401
+ def smart_join_router(prompt: str) -> str:
402
+ """
403
+ An intelligent router that suggests the correct JoinCollections input string
404
+ for common user intent like payments → orders → users → name.
405
+ """
406
+ prompt_lower = prompt.lower()
407
+ if "payment" in prompt_lower and any(term in prompt_lower for term in ["who", "name", "user", "person"]):
408
+ return "from=Payments, key=order_id, to=Orders, match=order_id, next_key=user_id, next_to=Users, next_match=user_id, return=name"
409
+ elif "order" in prompt_lower and "name" in prompt_lower:
410
+ return "from=Orders, key=user_id, to=Users, match=user_id, return=name"
411
+ # Extend as needed
412
+ return "Unable to auto-generate join path. Please provide more context."
413
+
414
+
415
+ def init_mongo_agent(json_path):
416
+ global agent_executor, client, mongo_db, db_mode
417
+
418
+ client = MongoClient("mongodb://localhost:27017/")
419
+ mongo_db = client['uploaded_mongo']
420
+ with open(json_path, 'r', encoding='utf-8') as f:
421
+ data = json_util.loads(f.read())
422
+
423
+ # Handle both single-collection and multi-collection formats
424
+ if isinstance(data, list):
425
+ # Default collection name if only a list is provided
426
+ collection = mongo_db['default_collection']
427
+ collection.drop()
428
+ collection.insert_many(data)
429
+ elif isinstance(data, dict):
430
+ for col_name, docs in data.items():
431
+ collection = mongo_db[col_name]
432
+ collection.drop()
433
+ if isinstance(docs, list):
434
+ collection.insert_many(docs)
435
+ else:
436
+ collection.insert_one(docs)
437
+ else:
438
+ raise ValueError("Unsupported JSON format. Must be a list or dict.")
439
+
440
+ def list_collections(_input=None):
441
+ return mongo_db.list_collection_names()
442
+
443
+ find_docs_tool = Tool(
444
+ name="FindDocuments",
445
+ description=(
446
+ "Use this tool to find documents in a MongoDB collection.\n"
447
+ "Input format:\n"
448
+ "- `collection=<collection>, key=<field>, value=<value>` for precise queries\n"
449
+ "- OR `collection=<collection>, value=<value>` to search across all fields\n"
450
+ "If `key` is omitted, the tool will automatically scan all fields to find matching values.\n"
451
+ "Examples:\n"
452
+ "- `collection=default_collection, key=name, value=Lauren Alexander`\n"
453
+ "- `collection=default_collection, value=Lauren Alexander`"
454
+ ),
455
+ func=find_docs_tool_func)
456
+
457
+ aggregate_tool = Tool(
458
+ name="AggregateGroupBy",
459
+ func=aggregate_group_by,
460
+ description=(
461
+ "Group documents and count by any field. Format: collection=<name>, field=<group_by_field>. E.g., collection=residents, field=gender"
462
+ )
463
+ )
464
+ get_all_documents_tool = Tool(
465
+ name="GetAllDocuments",
466
+ func=get_all_documents,
467
+ description=(
468
+ "Fetch all documents from a collection. Input: collection name only. Example: residents"
469
+ )
470
+ )
471
+
472
+ fuzzy_tool = Tool(
473
+ name="FuzzyFindDocuments",
474
+ func=fuzzy_find_documents,
475
+ description=("Fuzzy match documents across all fields in a collection. Format: collection=<name>, value=<search_term>, threshold=80 (optional)"
476
+ )
477
+ )
478
+
479
+ join_collection_tool = Tool(
480
+ name="JoinCollections",
481
+ func=join_collections_tool_func,
482
+ description=(
483
+ "Join collections to map foreign keys to human-readable values. Supports 1 or 2-level joins.\n"
484
+ "Formats:\n"
485
+ "- from=Payments, key=order_id, to=Orders, match=order_id, return=status\n"
486
+ "- from=Payments, key=order_id, to=Orders, match=order_id, next_key=user_id, next_to=Users, next_match=user_id, return=name"
487
+ )
488
+ )
489
+ smart_router_tool = Tool(
490
+ name="SmartJoinRouter",
491
+ func=smart_join_router,
492
+ description=(
493
+ "Suggest the correct JoinCollections input format based on user intent.\n"
494
+ "Use this when you are unsure how to form the join input."
495
+ )
496
+ )
497
+
498
+ tools = [
499
+ Tool(name="FindDocuments", func=find_docs_tool,
500
+ description="Flexible MongoDB search..."),
501
+ Tool(name="ListCollections", func=lambda x: list_collections(),
502
+ description="List all collections..."),
503
+ Tool(name="AggregateGroupBy", func=aggregate_tool,
504
+ description="Group and count by any field..."),
505
+ Tool(name="GetAllDocuments", func=get_all_documents_tool,
506
+ description="Fetch all documents from a collection..."),
507
+ Tool(name="FuzzyFindDocuments", func=fuzzy_tool,
508
+ description="Fuzzy match documents across all fields..."),
509
+ Tool(name="JoinCollections", func=join_collection_tool,
510
+ description="Join related collections to return names instead of IDs..."),
511
+ Tool(name="SmartJoinCollections", func=smart_router_tool,
512
+ description="Smrt Join related collections to return names instead of IDs...")
513
+ ]
514
+
515
+ agent_executor = initialize_agent(
516
+ tools=tools,
517
+ llm=llm,
518
+ agent_type=AgentType.CONVERSATIONAL_REACT_DESCRIPTION,
519
+ memory=memory,
520
+ verbose=True,
521
+ prefix=system_message,
522
+ handle_parsing_errors=True
523
+ )
524
+ db_mode = "mongo"
525
+
526
+
527
+ @app.errorhandler(Exception)
528
+ def handle_all_errors(e):
529
+ print(f"[ERROR] Global handler caught an exception: {str(e)}")
530
+ traceback.print_exc()
531
+
532
+ if isinstance(e, HTTPException):
533
+ return jsonify({"status": "error", "message": e.description}), e.code
534
+
535
+ return jsonify({"status": "error", "message": "An unexpected error occurred"}), 500
536
+
537
+
538
+ @app.errorhandler(TooManyRequests)
539
+ def handle_429_error(e):
540
+ return jsonify({
541
+ "status": "error",
542
+ "message": "🚦 Agent is busy, try again after sometime."
543
+ }), 429
544
+
545
+ # --- ROUTES ---
546
+
547
+
548
+ @app.route("/")
549
+ def index():
550
+ return render_template("app_index.html")
551
+
552
+
553
+ @app.route("/upload_db", methods=["POST"])
554
+ @error_safe
555
+ def upload_db():
556
+ file = request.files.get("file")
557
+ if not file or file.filename == "":
558
+ return jsonify(success=False, message="No file provided"), 400
559
+
560
+ filename = secure_filename(file.filename)
561
+ path = os.path.join(app.config["UPLOAD_FOLDER"], filename)
562
+ file.save(path)
563
+
564
+ try:
565
+ if filename.endswith(".json"):
566
+ init_mongo_agent(path)
567
+ mongo_db = globals().get("mongo_db")
568
+ db_name = getattr(mongo_db, "name", None) or os.path.splitext(filename)[0]
569
+ return jsonify({"database_name": db_name, "message": "MongoDB initialized"}), 200
570
+ # return jsonify(success=True, message="MongoDB initialized")
571
+ # elif filename.endswith(".db"):
572
+ # init_sql_agent(path)
573
+ # return jsonify(success=True, message="SQL DB initialized")
574
+ # SQL DB (.db or .sqlite)
575
+ elif filename.lower().endswith(".db") or filename.lower().endswith(".sqlite"):
576
+ init_sql_agent(path) # your existing initializer
577
+ db_name = os.path.splitext(filename)[0]
578
+ return jsonify({"database_name": db_name, "message": "SQL DB initialized"}), 200
579
+ else:
580
+ return jsonify(success=False, message="Unsupported file format"), 400
581
+ except Exception as e:
582
+ traceback.print_exc()
583
+ return jsonify(success=False, message=f"Init failed: {e}"), 500
584
+
585
+ @app.route("/generate", methods=["POST"])
586
+ @error_safe
587
+ def generate():
588
+ try:
589
+ data = request.get_json(force=True) or {}
590
+ prompt = data.get("prompt", "").strip()
591
+ if not prompt:
592
+ return jsonify({"status": "error", "message": "Prompt is required"}), 400
593
+ except Exception:
594
+ traceback.print_exc()
595
+ return jsonify({"status": "error", "message": "Invalid input"}), 400
596
+
597
+ try:
598
+ # invoke your agent synchronously
599
+ result = agent_executor.invoke({"input": prompt})
600
+
601
+ # Normalize final_answer from agent output safely
602
+ if isinstance(result, dict):
603
+ final_answer = (
604
+ result.get("final_answer")
605
+ or result.get("output")
606
+ or result.get("answer")
607
+ or result.get("text")
608
+ or ""
609
+ )
610
+ else:
611
+ final_answer = str(result or "")
612
+
613
+ if final_answer is None:
614
+ final_answer = ""
615
+
616
+ # Optionally keep emitting to socket so clients listening to socketio still get it
617
+ try:
618
+ socketio.emit("final", {"message": final_answer})
619
+ except Exception:
620
+ app.logger.debug("socket emit failed, continuing")
621
+
622
+ return jsonify({"final_answer": final_answer, "prompt": prompt}), 200
623
+
624
+ except Exception as e:
625
+ app.logger.exception("Agent invocation failed")
626
+ return jsonify({"prompt": prompt, "final_answer": "", "message": f"Agent error: {str(e)[:200]}"}), 500
627
+
628
+
629
+ if __name__ == "__main__":
630
+ # socketio.run(app, debug=True)
631
+ socketio.run(app, host="0.0.0.0", port=7860, allow_unsafe_werkzeug=True)