prthm11 commited on
Commit
28d340c
·
verified ·
1 Parent(s): 1767a62

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +630 -0
app.py ADDED
@@ -0,0 +1,630 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # --- IMPORTS ---
2
+ from werkzeug.exceptions import TooManyRequests
3
+ from flask import Flask, request, jsonify, render_template
4
+ from flask_socketio import SocketIO, emit
5
+ from langchain_google_genai import ChatGoogleGenerativeAI
6
+ from langchain.agents import initialize_agent, AgentType, create_react_agent, AgentExecutor
7
+ from langchain_community.agent_toolkits import create_sql_agent, SQLDatabaseToolkit
8
+ from langchain_community.utilities import SQLDatabase
9
+ from langchain.tools import Tool
10
+ from langchain.memory import ConversationBufferMemory
11
+ from pymongo import MongoClient
12
+ import threading
13
+ import os, uuid
14
+ import re
15
+ import traceback
16
+ import ast
17
+ from bson import json_util
18
+ from dotenv import load_dotenv
19
+ from werkzeug.utils import secure_filename
20
+ from werkzeug.exceptions import HTTPException
21
+ from langchain.prompts import ChatPromptTemplate
22
+ from tabulate import tabulate
23
+ from fuzzywuzzy import fuzz
24
+ # from langchain_groq import ChatGroq
25
+ from datetime import datetime
26
+
27
+ def error_safe(f):
28
+ def wrapper(*args, **kwargs):
29
+ try:
30
+ return f(*args, **kwargs)
31
+ except HTTPException as he:
32
+ return jsonify({"status": "error", "message": he.description}), he.code
33
+ except Exception as e:
34
+ print("[ERROR] Uncaught Exception in", f.__name__)
35
+ traceback.print_exc()
36
+ return jsonify({"status": "error", "message": str(e)}), 500
37
+ wrapper.__name__ = f.__name__
38
+ return wrapper
39
+
40
+
41
+ # --- ENV + FLASK SETUP ---
42
+ load_dotenv()
43
+ os.environ["GEMINI_API_KEY"] = os.getenv("GEMINI_API_KEY")
44
+
45
+ app = Flask(__name__)
46
+ app.config['SECRET_KEY'] = os.urandom(32)
47
+ app.config['UPLOAD_FOLDER'] = 'uploads'
48
+ socketio = SocketIO(app, cors_allowed_origins="*")
49
+ os.makedirs(app.config['UPLOAD_FOLDER'], exist_ok=True)
50
+
51
+ llm = ChatGoogleGenerativeAI(
52
+ temperature=0.2,
53
+ model="gemini-2.0-flash",
54
+ max_retries=50,
55
+ api_key=os.getenv("GEMINI_API_KEY")
56
+ )
57
+ # llm = ChatGroq(temperature=0.2, model_name="mistral-saba-24b",api_key=os.getenv("GROQ_API_KEY"))
58
+
59
+ # --- GLOBALS ---
60
+ agent_executor = None
61
+ memory = ConversationBufferMemory(
62
+ memory_key="chat_history", return_messages=True, input_key="input")
63
+ mongo_db = None
64
+ client = None
65
+ db_mode = None # "mongo" or "sql"
66
+
67
+ # --- SHARED ---
68
+
69
+
70
+ def is_schema_request(prompt: str) -> bool:
71
+ pattern = re.compile(
72
+ r'\b(schema|table names|tables|columns|structure|column names|collections?|field names|metadata|describe|show)\b', re.IGNORECASE)
73
+ return bool(pattern.search(prompt))
74
+
75
+
76
+ def is_sensitive_request(prompt: str) -> bool:
77
+ sensitive_keywords = [
78
+ "password", "token", "credential", "secret", "api key", "schema", "structure",
79
+ "collection name", "field name", "user_id", "order_id", "payment_id",
80
+ "internal", "database structure", "table structure", "email", "phone", "contact", "ssn"
81
+ ]
82
+ lowered = prompt.lower()
83
+ return any(keyword in lowered for keyword in sensitive_keywords)
84
+
85
+
86
+ intent_prompt = ChatPromptTemplate.from_messages([
87
+ ("system", "Classify if the user is asking schema/structure/sensitive info (tables, columns, schema): YES or NO."),
88
+ ("human", "{prompt}")
89
+ ])
90
+ intent_checker = intent_prompt | llm
91
+
92
+
93
+ def is_schema_leak_request(prompt):
94
+ try:
95
+ classification = intent_checker.invoke({"prompt": prompt})
96
+ return "yes" in classification.content.strip().lower()
97
+ except:
98
+ return False
99
+
100
+ # --- INIT SQL AGENT ---
101
+ def init_sql_agent(db_path):
102
+ global agent_executor, db_mode
103
+ db = SQLDatabase.from_uri(f"sqlite:///{db_path}")
104
+ toolkit = SQLDatabaseToolkit(db=db, llm=llm)
105
+ prefix = '''You are a helpful SQL expert agent that ALWAYS returns natural language answers using the tools.'''
106
+ # Always format your responses in Markdown. For example:
107
+ # - Use bullet points
108
+ # - Use bold for headers
109
+ # - Wrap code in triple backticks
110
+ # - Tables should use Markdown table syntax
111
+
112
+ # You must NEVER:
113
+ # - Show or mention SQL syntax.
114
+ # - Reveal table names, column names, or database schema.
115
+ # - Respond with any technical details or structure of the database.
116
+ # - Return code or tool names.
117
+ # - Give wrong Answers.
118
+
119
+ # You must ALWAYS:
120
+ # - Respond in plain, friendly language.
121
+ # - Don't Summarize the result for the user (e.g., "There are 9 tables in the system.")
122
+ # - If asked to list table names or schema, politely refuse and respond with:
123
+ # "I'm sorry, I can't share database structure information."
124
+ # - ALWAYS HAVE TO SOLVE COMPLEX USER QUERIES. FOR THAT, UNDERSTAND THE PROMPT, ANALYSE PROPER AND THEN GIVE ANSWER.
125
+ # - Your Answers should be correct, you have to do understand process well and give accurate answers.
126
+ # - IF USER ASK ABOUT DATA, Which is not there in a database, then GIVE FOLLOWING ANSWER:
127
+ # "There is no such data in the Database."
128
+
129
+ # Strict Rules You MUST Follow:
130
+ # - NEVER display or mention SQL queries.
131
+ # - NEVER explain SQL syntax or logic.
132
+ # - NEVER return technical or code-like responses.
133
+ # - ONLY respond in natural, human-friendly language.
134
+ # - You are not allow to give the name of any COLUMNS, TABLES, DATABASE, ENTITY, SYNTAX, STRUCTURE, DESIGN, ETC...
135
+
136
+ # If the user asks for anything other than retrieving data (SELECT), respond using this exact message:
137
+ # "I'm not allowed to perform operations other than SELECT queries. Please ask something that involves reading data."
138
+
139
+ # Do not return SQL queries or raw technical responses to the user.
140
+
141
+ # For example:
142
+ # Wrong: SELECT * FROM ...
143
+ # Correct: The user assigned to the cart is Alice Smith.
144
+
145
+ # Use the tools provided to get the correct data from the database and summarize the response clearly.
146
+ # If the input is unclear or lacks sufficient data, ask for clarification using the SubmitFinalAnswer tool.
147
+ # Never return SQL queries as your response.
148
+
149
+ # If you cannot find an answer,
150
+ # Double-check your query and running it again.
151
+ # - If a query fails, revise and try again.
152
+ # - Else 'No data found' using SubmitFinalAnswer.No SQL, no code. '''
153
+ agent_executor = create_sql_agent(
154
+ llm=llm,
155
+ toolkit=toolkit,
156
+ verbose=True,
157
+ prefix=prefix,
158
+ agent_type=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
159
+ memory=memory,
160
+ agent_executor_kwargs={"handle_parsing_errors": True},
161
+ )
162
+ db_mode = "sql"
163
+
164
+ # --- INIT MONGO AGENT ---
165
+ system_message = """
166
+ You are **MongoDBQueryBot**, a highly intelligent and accurate assistant for answering questions about data stored in a MongoDB database using tools.
167
+ """
168
+
169
+ # ### 🚨 Critical Instructions (Strictly Follow These):
170
+ # - You **must always** use tools provided to answer user questions.
171
+ # - Always join IDs with associated human-readable values like names or titles when answering.
172
+ # - Prefer displaying `user name`, `employee name`, or `product name` instead of internal IDs like `user_id`, `emp_id`, or `product_id`.
173
+ # - Avoid responding only with technical identifiers. Make responses meaningful to users.
174
+ # - **Never** guess or fabricate any information.
175
+ # - **Do not** show raw JSON, field names, or database structure.
176
+ # - Your role is **read-only**: do not suggest or perform insert/update/delete.
177
+ # - After Using All the available tools, if you are Unable to find any documents, then give followig ANSWER:
178
+ # "Please, rephrase your query because I can't exactly understand, what you want !"
179
+ # - If a query can't be answered or is unrelated to reading data, reply:
180
+ # ❌ "I'm only allowed to retrieve data. Please ask a query involving reading information."
181
+ # - IF USER ASK ABOUT DATA, Which is not there in a database, then GIVE FOLLOWING ANSWER:
182
+ # "There is no such data in the Database."
183
+ # - When returning answers:
184
+ # - Do **not return internal IDs** like `user_id`, `order_id`, `payment_id`, etc.
185
+ # - Instead, use human-readable fields like `name`, `full_name`, `user_name`, etc., from related collections.
186
+ # - If only an ID is available, try joining the relevant collections to fetch the proper display name.
187
+
188
+ # ### 🧠 How to Think:
189
+ # - Understand **exactly** what the user is trying to ask. Do not answer if unclear — ask for clarification.
190
+ # - Translate the user prompt into tool inputs by identifying:
191
+ # - Which collection to search
192
+ # - What value or field they're referring to
193
+ # - The correct format expected by the tool
194
+
195
+ # ### 🛠️ Tool Usage Guide:
196
+ # - Use `FindDocuments` for queries like:
197
+ # - "Show me all employees named John"
198
+ # - "What is the salary of Manager X?"
199
+ # - Use `ListCollections` to discover available data types (but don’t share them directly).
200
+ # - **IMPORTANT : Don't Iterate only in one tool, if you can't able to answer using current tool you using, then swith the tool !**
201
+ # - Use `JoinCollections` to resolve IDs into names when the question asks about people, customers, or products.
202
+ # - When resolving names from payments, use this format:
203
+ # `from=Payments, key=order_id, to=Orders, match=order_id, next_key=user_id, next_to=Users, next_match=user_id, return=name`
204
+
205
+ # - Your goal is to **return the person's name** (e.g., `name`, `user_name`, `full_name`) not their ID.
206
+ # - Always prioritize returning names instead of internal identifiers.
207
+ # - Examples:
208
+ # - For payment-related questions → Join Payments → Orders → Users and return name
209
+ # - For order questions → Join Orders → Users and return user names
210
+
211
+ # ### 🧾 Response Format:
212
+ # - Use **clear markdown with tables** when displaying data.
213
+ # - If no data is found: return `**No documents found.**`
214
+ # - Stay professional, brief, and relevant.
215
+
216
+ # ### 🚫 Never Do This:
217
+ # - Do not leak MongoDB structure, schema, or field names.
218
+ # - Do not suggest code, MongoDB syntax, or field mappings.
219
+ # - Do not hallucinate or make assumptions.
220
+
221
+ # Start by analyzing the prompt carefully, select the right tool, invoke it, and return a user-friendly answer based on the result.
222
+ # """
223
+
224
+
225
+ def find_docs_tool_func(query: str) -> str:
226
+ """
227
+ Flexible MongoDB search with fallback:
228
+ - First tries in specified collection.
229
+ - If no results found, falls back to search across all collections.
230
+ Input format:
231
+ - collection=<collection>, key=<field>, value=<value>
232
+ - OR: collection=<collection>, value=<value>
233
+ """
234
+ try:
235
+ parts = dict(part.strip().split("=", 1)
236
+ for part in query.split(",") if "=" in part)
237
+ collection = parts.get("collection")
238
+ key = parts.get("key")
239
+ value = parts.get("value")
240
+ if not collection:
241
+ return "❌ 'collection' is required."
242
+
243
+ def query_collection(coll_name):
244
+ if key and value:
245
+ return list(mongo_db[coll_name].find({key: value}, {'_id': 0}))
246
+ elif value:
247
+ return [doc for doc in mongo_db[coll_name].find({}, {'_id': 0}) if any(str(v).lower() == value.lower() for v in doc.values())]
248
+ else:
249
+ return list(mongo_db[coll_name].find({}, {'_id': 0}))
250
+
251
+ docs = query_collection(collection)
252
+ if docs:
253
+ return "\n markdown\n" + tabulate(docs, headers="keys", tablefmt="github") + "\n"
254
+
255
+ for coll in mongo_db.list_collection_names():
256
+ if coll == collection:
257
+ continue
258
+ docs = query_collection(coll)
259
+ if docs:
260
+ return "\n markdown\n" + tabulate(docs, headers="keys", tablefmt="github") + "\n"
261
+
262
+ return "**No documents found.**"
263
+ except Exception as e:
264
+ return f"Invalid input format or error: {str(e)}"
265
+
266
+
267
+ def aggregate_group_by(_input: str):
268
+ try:
269
+ if _input.strip().startswith("{"):
270
+ # Parse JSON-like string
271
+ args = ast.literal_eval(_input)
272
+ collection = args.get("collection_name") or args.get("collection")
273
+ field = args.get("group_by") or args.get("field")
274
+ else:
275
+ # Handle legacy input format
276
+ args = dict(x.split("=") for x in _input.split(","))
277
+ collection = args["collection"]
278
+ field = args["field"]
279
+
280
+ pipeline = [
281
+ {"$group": {"_id": f"${field}", "count": {"$sum": 1}}},
282
+ {"$project": {"_id": 0, field: "$_id", "count": 1}}
283
+ ]
284
+ result = list(mongo_db[collection].aggregate(pipeline))
285
+ if not result:
286
+ return "**No data found.**"
287
+ return "\n markdown\n" + tabulate(result, headers="keys", tablefmt="github") + "\n"
288
+ except Exception as e:
289
+ return f"Aggregation failed: {e}"
290
+
291
+
292
+ def get_all_documents(collection: str):
293
+ try:
294
+ docs = list(mongo_db[collection].find({}, {'_id': 0}))
295
+ if not docs:
296
+ return "**No documents found.**"
297
+ return "\n markdown\n" + tabulate(docs, headers="keys", tablefmt="github") + "\n"
298
+ except Exception as e:
299
+ return f"Error fetching documents: {e}"
300
+
301
+
302
+ def fuzzy_find_documents(query: str):
303
+ try:
304
+ parts = dict(part.strip().split("=", 1) for part in query.split(","))
305
+ collection = parts["collection"]
306
+ value = parts["value"]
307
+ threshold = int(parts.get("threshold", 80))
308
+
309
+ matches = []
310
+ for doc in mongo_db[collection].find({}, {'_id': 0}):
311
+ if any(fuzz.partial_ratio(str(v).lower(), value.lower()) >= threshold for v in doc.values()):
312
+ matches.append(doc)
313
+ if not matches:
314
+ return "**No fuzzy matches found.**"
315
+ return "\n markdown\n" + tabulate(matches, headers="keys", tablefmt="github") + "\n"
316
+ except Exception as e:
317
+ return f"Fuzzy match error: {e}"
318
+
319
+ # def join_collections_tool_func(_input: str):
320
+ # try:
321
+ # # Parse input like: from=Products, key=category_id, to=Categories, match=category_id, return=category_name
322
+ # args = dict(x.strip().split("=", 1) for x in _input.split(","))
323
+ # from_collection = args["from"]
324
+ # foreign_key = args["key"]
325
+ # to_collection = args["to"]
326
+ # match_key = args["match"]
327
+ # return_field = args["return"]
328
+
329
+ # results = []
330
+ # foreign_lookup = {
331
+ # doc[match_key]: doc.get(return_field)
332
+ # for doc in mongo_db[to_collection].find()
333
+ # if match_key in doc
334
+ # }
335
+
336
+ # for doc in mongo_db[from_collection].find({}, {'_id': 0}):
337
+ # doc[return_field] = foreign_lookup.get(doc.get(foreign_key), "Unknown")
338
+ # results.append(doc)
339
+
340
+ # if not results:
341
+ # return "**No documents found.**"
342
+
343
+ # return "\n markdown\n" + tabulate(results, headers="keys", tablefmt="github") + "\n"
344
+
345
+ # except Exception as e:
346
+ # return f"Join failed: {e}"
347
+
348
+
349
+ def join_collections_tool_func(_input: str):
350
+ """
351
+ Supports 2-level join (Payments → Orders → Users) or any pair-wise join
352
+ Input formats:
353
+ - from=Payments, key=order_id, to=Orders, match=order_id, next_key=user_id, next_to=Users, next_match=user_id, return=name
354
+ - from=Products, key=category_id, to=Categories, match=category_id, return=category_name
355
+ """
356
+ try:
357
+ args = dict(x.strip().split("=", 1) for x in _input.split(","))
358
+ from_coll = args["from"]
359
+ key = args["key"]
360
+ to_coll = args["to"]
361
+ match = args["match"]
362
+ return_field = args["return"]
363
+
364
+ next_key = args.get("next_key")
365
+ next_to = args.get("next_to")
366
+ next_match = args.get("next_match")
367
+
368
+ # First join (e.g., Payments → Orders)
369
+ to_docs = {doc[match]: doc for doc in mongo_db[to_coll].find()
370
+ if match in doc}
371
+ joined = []
372
+ for doc in mongo_db[from_coll].find({}, {'_id': 0}):
373
+ foreign_doc = to_docs.get(doc.get(key))
374
+ if not foreign_doc:
375
+ continue
376
+ merged = {**doc, **foreign_doc}
377
+ joined.append(merged)
378
+
379
+ # Second join (e.g., Orders → Users)
380
+ if next_key and next_to and next_match:
381
+ next_docs = {
382
+ doc[next_match]: doc for doc in mongo_db[next_to].find() if next_match in doc}
383
+ for doc in joined:
384
+ user_doc = next_docs.get(doc.get(next_key))
385
+ if user_doc:
386
+ doc[return_field] = user_doc.get(return_field, "Unknown")
387
+ else:
388
+ doc[return_field] = "Unknown"
389
+
390
+ # Prepare final result
391
+ if not joined:
392
+ return "**No documents found.**"
393
+ final = [{return_field: doc.get(return_field)}
394
+ for doc in joined if return_field in doc]
395
+ return "\n markdown\n" + tabulate(final, headers="keys", tablefmt="github") + "\n"
396
+
397
+ except Exception as e:
398
+ return f"Join failed: {e}"
399
+
400
+
401
+ def smart_join_router(prompt: str) -> str:
402
+ """
403
+ An intelligent router that suggests the correct JoinCollections input string
404
+ for common user intent like payments → orders → users → name.
405
+ """
406
+ prompt_lower = prompt.lower()
407
+ if "payment" in prompt_lower and any(term in prompt_lower for term in ["who", "name", "user", "person"]):
408
+ return "from=Payments, key=order_id, to=Orders, match=order_id, next_key=user_id, next_to=Users, next_match=user_id, return=name"
409
+ elif "order" in prompt_lower and "name" in prompt_lower:
410
+ return "from=Orders, key=user_id, to=Users, match=user_id, return=name"
411
+ # Extend as needed
412
+ return "Unable to auto-generate join path. Please provide more context."
413
+
414
+
415
+ def init_mongo_agent(json_path):
416
+ global agent_executor, client, mongo_db, db_mode
417
+
418
+ client = MongoClient("mongodb://localhost:27017/")
419
+ mongo_db = client['uploaded_mongo']
420
+ with open(json_path, 'r', encoding='utf-8') as f:
421
+ data = json_util.loads(f.read())
422
+
423
+ # Handle both single-collection and multi-collection formats
424
+ if isinstance(data, list):
425
+ # Default collection name if only a list is provided
426
+ collection = mongo_db['default_collection']
427
+ collection.drop()
428
+ collection.insert_many(data)
429
+ elif isinstance(data, dict):
430
+ for col_name, docs in data.items():
431
+ collection = mongo_db[col_name]
432
+ collection.drop()
433
+ if isinstance(docs, list):
434
+ collection.insert_many(docs)
435
+ else:
436
+ collection.insert_one(docs)
437
+ else:
438
+ raise ValueError("Unsupported JSON format. Must be a list or dict.")
439
+
440
+ def list_collections(_input=None):
441
+ return mongo_db.list_collection_names()
442
+
443
+ find_docs_tool = Tool(
444
+ name="FindDocuments",
445
+ description=(
446
+ "Use this tool to find documents in a MongoDB collection.\n"
447
+ "Input format:\n"
448
+ "- `collection=<collection>, key=<field>, value=<value>` for precise queries\n"
449
+ "- OR `collection=<collection>, value=<value>` to search across all fields\n"
450
+ "If `key` is omitted, the tool will automatically scan all fields to find matching values.\n"
451
+ "Examples:\n"
452
+ "- `collection=default_collection, key=name, value=Lauren Alexander`\n"
453
+ "- `collection=default_collection, value=Lauren Alexander`"
454
+ ),
455
+ func=find_docs_tool_func)
456
+
457
+ aggregate_tool = Tool(
458
+ name="AggregateGroupBy",
459
+ func=aggregate_group_by,
460
+ description=(
461
+ "Group documents and count by any field. Format: collection=<name>, field=<group_by_field>. E.g., collection=residents, field=gender"
462
+ )
463
+ )
464
+ get_all_documents_tool = Tool(
465
+ name="GetAllDocuments",
466
+ func=get_all_documents,
467
+ description=(
468
+ "Fetch all documents from a collection. Input: collection name only. Example: residents"
469
+ )
470
+ )
471
+
472
+ fuzzy_tool = Tool(
473
+ name="FuzzyFindDocuments",
474
+ func=fuzzy_find_documents,
475
+ description=("Fuzzy match documents across all fields in a collection. Format: collection=<name>, value=<search_term>, threshold=80 (optional)"
476
+ )
477
+ )
478
+
479
+ join_collection_tool = Tool(
480
+ name="JoinCollections",
481
+ func=join_collections_tool_func,
482
+ description=(
483
+ "Join collections to map foreign keys to human-readable values. Supports 1 or 2-level joins.\n"
484
+ "Formats:\n"
485
+ "- from=Payments, key=order_id, to=Orders, match=order_id, return=status\n"
486
+ "- from=Payments, key=order_id, to=Orders, match=order_id, next_key=user_id, next_to=Users, next_match=user_id, return=name"
487
+ )
488
+ )
489
+ smart_router_tool = Tool(
490
+ name="SmartJoinRouter",
491
+ func=smart_join_router,
492
+ description=(
493
+ "Suggest the correct JoinCollections input format based on user intent.\n"
494
+ "Use this when you are unsure how to form the join input."
495
+ )
496
+ )
497
+
498
+ tools = [
499
+ Tool(name="FindDocuments", func=find_docs_tool,
500
+ description="Flexible MongoDB search..."),
501
+ Tool(name="ListCollections", func=lambda x: list_collections(),
502
+ description="List all collections..."),
503
+ Tool(name="AggregateGroupBy", func=aggregate_tool,
504
+ description="Group and count by any field..."),
505
+ Tool(name="GetAllDocuments", func=get_all_documents_tool,
506
+ description="Fetch all documents from a collection..."),
507
+ Tool(name="FuzzyFindDocuments", func=fuzzy_tool,
508
+ description="Fuzzy match documents across all fields..."),
509
+ Tool(name="JoinCollections", func=join_collection_tool,
510
+ description="Join related collections to return names instead of IDs..."),
511
+ Tool(name="SmartJoinCollections", func=smart_router_tool,
512
+ description="Smrt Join related collections to return names instead of IDs...")
513
+ ]
514
+
515
+ agent_executor = initialize_agent(
516
+ tools=tools,
517
+ llm=llm,
518
+ agent_type=AgentType.CONVERSATIONAL_REACT_DESCRIPTION,
519
+ memory=memory,
520
+ verbose=True,
521
+ prefix=system_message,
522
+ handle_parsing_errors=True
523
+ )
524
+ db_mode = "mongo"
525
+
526
+
527
+ @app.errorhandler(Exception)
528
+ def handle_all_errors(e):
529
+ print(f"[ERROR] Global handler caught an exception: {str(e)}")
530
+ traceback.print_exc()
531
+
532
+ if isinstance(e, HTTPException):
533
+ return jsonify({"status": "error", "message": e.description}), e.code
534
+
535
+ return jsonify({"status": "error", "message": "An unexpected error occurred"}), 500
536
+
537
+
538
+ @app.errorhandler(TooManyRequests)
539
+ def handle_429_error(e):
540
+ return jsonify({
541
+ "status": "error",
542
+ "message": "🚦 Agent is busy, try again after sometime."
543
+ }), 429
544
+
545
+ # --- ROUTES ---
546
+
547
+
548
+ @app.route("/")
549
+ def index():
550
+ return render_template("app_index.html")
551
+
552
+
553
+ @app.route("/upload_db", methods=["POST"])
554
+ @error_safe
555
+ def upload_db():
556
+ file = request.files.get("file")
557
+ if not file or file.filename == "":
558
+ return jsonify(success=False, message="No file provided"), 400
559
+
560
+ filename = secure_filename(file.filename)
561
+ path = os.path.join(app.config["UPLOAD_FOLDER"], filename)
562
+ file.save(path)
563
+
564
+ try:
565
+ if filename.endswith(".json"):
566
+ init_mongo_agent(path)
567
+ mongo_db = globals().get("mongo_db")
568
+ db_name = getattr(mongo_db, "name", None) or os.path.splitext(filename)[0]
569
+ return jsonify({"database_name": db_name, "message": "MongoDB initialized"}), 200
570
+ # return jsonify(success=True, message="MongoDB initialized")
571
+ # elif filename.endswith(".db"):
572
+ # init_sql_agent(path)
573
+ # return jsonify(success=True, message="SQL DB initialized")
574
+ # SQL DB (.db or .sqlite)
575
+ elif filename.lower().endswith(".db") or filename.lower().endswith(".sqlite"):
576
+ init_sql_agent(path) # your existing initializer
577
+ db_name = os.path.splitext(filename)[0]
578
+ return jsonify({"database_name": db_name, "message": "SQL DB initialized"}), 200
579
+ else:
580
+ return jsonify(success=False, message="Unsupported file format"), 400
581
+ except Exception as e:
582
+ traceback.print_exc()
583
+ return jsonify(success=False, message=f"Init failed: {e}"), 500
584
+
585
+ @app.route("/generate", methods=["POST"])
586
+ @error_safe
587
+ def generate():
588
+ try:
589
+ data = request.get_json(force=True) or {}
590
+ prompt = data.get("prompt", "").strip()
591
+ if not prompt:
592
+ return jsonify({"status": "error", "message": "Prompt is required"}), 400
593
+ except Exception:
594
+ traceback.print_exc()
595
+ return jsonify({"status": "error", "message": "Invalid input"}), 400
596
+
597
+ try:
598
+ # invoke your agent synchronously
599
+ result = agent_executor.invoke({"input": prompt})
600
+
601
+ # Normalize final_answer from agent output safely
602
+ if isinstance(result, dict):
603
+ final_answer = (
604
+ result.get("final_answer")
605
+ or result.get("output")
606
+ or result.get("answer")
607
+ or result.get("text")
608
+ or ""
609
+ )
610
+ else:
611
+ final_answer = str(result or "")
612
+
613
+ if final_answer is None:
614
+ final_answer = ""
615
+
616
+ # Optionally keep emitting to socket so clients listening to socketio still get it
617
+ try:
618
+ socketio.emit("final", {"message": final_answer})
619
+ except Exception:
620
+ app.logger.debug("socket emit failed, continuing")
621
+
622
+ return jsonify({"final_answer": final_answer, "prompt": prompt}), 200
623
+
624
+ except Exception as e:
625
+ app.logger.exception("Agent invocation failed")
626
+ return jsonify({"prompt": prompt, "final_answer": "", "message": f"Agent error: {str(e)[:200]}"}), 500
627
+
628
+
629
+ if __name__ == "__main__":
630
+ socketio.run(app, debug=True)