Spaces:

prthm11
/

Database_Agent

Runtime error

App Files Files Community

prthm11 commited on Aug 26, 2025

Commit

c94b2e1

verified ·

1 Parent(s): f47702d

Create app.py

Browse files

Files changed (1) hide show

app.py +558 -0

app.py ADDED Viewed

	@@ -0,0 +1,558 @@

+from flask import Flask, request, jsonify, render_template
+from flask_socketio import SocketIO, emit
+from langchain_google_genai import ChatGoogleGenerativeAI
+from langchain.agents import initialize_agent, AgentType
+from langchain_community.agent_toolkits import create_sql_agent, SQLDatabaseToolkit
+from langchain_community.utilities import SQLDatabase
+from langchain.tools import Tool
+from langchain.memory import ConversationBufferMemory
+from pymongo import MongoClient
+import threading
+import os, re, traceback, ast
+from bson import json_util
+from dotenv import load_dotenv
+from werkzeug.exceptions import HTTPException
+from langchain.prompts import ChatPromptTemplate
+from tabulate import tabulate
+from fuzzywuzzy import fuzz
+import urllib
+import logging
+from urllib.parse import urlparse
+from langchain_groq import ChatGroq
+# --------------------------
+# BASIC CONFIG
+# --------------------------
+load_dotenv()
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+# Static SQL ODBC connection (hard-coded as requested)
+ODBC_CONN = (
+    "DRIVER={ODBC Driver 17 for SQL Server};"
+    f"SERVER={os.getenv('DB_SERVER','192.168.1.37')},"
+    f"{os.getenv('DB_PORT','1433')};"
+    f"DATABASE={os.getenv('DB_NAME','TunisSyncV1')};"
+    f"UID={os.getenv('DB_USER','sa')};"
+    f"PWD={os.getenv('DB_PASS','sa123')}"
+)
+# params = urllib.parse.quote_plus(ODBC_CONN)
+# DB_URI = f"mssql+pyodbc:///?odbc_connect={params}"
+# mssql+pyodbc:///?odbc_connect=DRIVER%3D%7BODBC+Driver+17+for+SQL+Server%7D%3BSERVER%3D192.168.1.37%2C1433%3BDATABASE%3DTunisSyncV1%3BUID%3Dsa%3BPWD%3Dsa123
+# # Static MongoDB URI (Atlas)
+# # MONGO_URI = os.getenv('MONGO_URI', 'mongodb+srv://dixitmwa:DixitWa%40123!@cluster0.qiysaz9.mongodb.net/shopdb')
+# MONGO_URI = 'mongodb+srv://dixitmwa:DixitWa%40123!@cluster0.qiysaz9.mongodb.net/shopdb'
+# --------------------------
+# Flask + SocketIO + LLM
+# --------------------------
+app = Flask(__name__)
+app.config['SECRET_KEY'] = os.urandom(32)
+app.config['UPLOAD_FOLDER'] = 'uploads'
+os.makedirs(app.config['UPLOAD_FOLDER'], exist_ok=True)
+socketio = SocketIO(app, cors_allowed_origins='*')
+llm = ChatGoogleGenerativeAI(
+    temperature=0.2,
+    model="gemini-2.0-flash",
+    max_retries=50,
+    api_key=os.getenv('GEMINI_API_KEY')
+)
+# llm = ChatGroq(
+#     # model="meta-llama/llama-4-scout-17b-16e-instruct",
+#     # model="deepseek-r1-distill-llama-70b",
+#     model= "meta-llama/llama-4-maverick-17b-128e-instruct",
+#     # model="openai/gpt-oss-120b",
+#     temperature=0,
+#     max_tokens=None,
+#     max_retries=50,
+#     api_key=os.getenv('GROQ_API_KEY')
+# )
+# Globals
+agent_executor = None
+memory = ConversationBufferMemory(memory_key='chat_history', return_messages=True, input_key='input')
+mongo_client = None
+mongo_db = None
+db_mode = None  # 'sql' or 'mongo'
+# --------------------------
+# Helpers / Safety checks
+# --------------------------
+def error_safe(f):
+    def wrapper(*args, **kwargs):
+        try:
+            return f(*args, **kwargs)
+        except HTTPException as he:
+            return jsonify({"status": "error", "message": he.description}), he.code
+        except Exception as e:
+            print('[ERROR] Uncaught Exception in', f.__name__)
+            traceback.print_exc()
+            return jsonify({"status": "error", "message": str(e)}), 500
+    wrapper.__name__ = f.__name__
+    return wrapper
+# def is_schema_request(prompt: str) -> bool:
+#     pattern = re.compile(r'\b(schema|table names|tables|columns|structure|column names|collections?|field names|metadata|describe|show)\b', re.IGNORECASE)
+#     return bool(pattern.search(prompt))
+# def is_sensitive_request(prompt: str) -> bool:
+#     sensitive_keywords = [
+#         "password", "token", "credential", "secret", "api key", "schema", "structure",
+#         "collection name", "field name", "user_id", "order_id", "payment_id",
+#         "internal", "database structure", "table structure", "email", "phone", "contact", "ssn"
+#     ]
+#     lowered = prompt.lower()
+#     return any(keyword in lowered for keyword in sensitive_keywords)
+# intent_prompt = ChatPromptTemplate.from_messages([
+#     ("system", "Classify if the user is asking schema/structure/sensitive info (tables, columns, schema): YES or NO."),
+#     ("human", "{prompt}")
+# ])
+# try:
+#     intent_checker = intent_prompt | llm
+# except Exception:
+#     intent_checker = None
+# def is_schema_leak_request(prompt):
+#     if intent_checker is None:
+#         return False
+#     try:
+#         classification = intent_checker.invoke({"prompt": prompt})
+#         text = ''
+#         if hasattr(classification, 'content'):
+#             text = classification.content
+#         elif hasattr(classification, 'text'):
+#             text = classification.text
+#         else:
+#             text = str(classification)
+#         return 'yes' in text.strip().lower()
+#     except Exception as e:
+#         logger.warning('Schema intent classifier failed: %s', e)
+#         return False
+# --------------------------
+# SQL agent initialization
+# --------------------------
+def init_sql_agent_from_uri(sql_uri: str):
+    global agent_executor, db_mode
+    try:
+        # # Detect dialect from URI prefix
+        # if sql_uri.startswith("postgresql://"):
+        #     dialect = "PostgreSQL"
+        # elif sql_uri.startswith("mysql://") or sql_uri.startswith("mysql+pymysql://"):
+        #     dialect = "MySQL"
+        # elif sql_uri.startswith("sqlite:///") or sql_uri.startswith("sqlite://"):
+        #     dialect = "SQLite"
+        # else:
+        #     dialect = "Generic SQL"
+        sql_db = SQLDatabase.from_uri(sql_uri)
+        toolkit = SQLDatabaseToolkit(db=sql_db, llm=llm)
+        prefix = '''You are a helpful SQL expert agent that ALWAYS returns natural language answers using the tools.
+                Always format your responses in Markdown. For example:
+                - Use bullet points
+                - Use bold for headers
+                - Wrap code in triple backticks
+                - Tables should use Markdown table syntax
+                You must NEVER:
+                - Show or mention SQL syntax.
+                - Reveal table names, column names, or database schema.
+                - Respond with any technical details or structure of the database.
+                - Return code or tool names.
+                - Give wrong Answers.
+                '''
+        agent = create_sql_agent(
+            llm=llm,
+            toolkit=toolkit,
+            verbose=True,
+            prefix=prefix,
+            agent_type=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
+            memory=memory,
+            agent_executor_kwargs={"handle_parsing_errors": True},
+        )
+        agent_executor = agent
+        db_mode = 'sql'
+        logger.info('SQL agent initialized using URI')
+    except Exception as e:
+        logger.error('Failed to initialize SQL agent: %s', e)
+        traceback.print_exc()
+# --------------------------
+# Mongo agent initialization
+# --------------------------
+def find_docs_tool_func(query: str) -> str:
+    try:
+        parts = dict(part.strip().split('=', 1) for part in query.split(',') if '=' in part)
+        collection = parts.get('collection')
+        key = parts.get('key')
+        value = parts.get('value')
+        if not collection:
+            return "❌ 'collection' is required."
+        def query_collection(coll_name):
+            if key and value:
+                return list(mongo_db[coll_name].find({key: value}, {'_id': 0}))
+            elif value:
+                return [doc for doc in mongo_db[coll_name].find({}, {'_id': 0}) if any(str(v).lower() == value.lower() for v in doc.values())]
+            else:
+                return list(mongo_db[coll_name].find({}, {'_id': 0}))
+        docs = query_collection(collection)
+        if docs:
+            return "\n markdown\n" + tabulate(docs, headers='keys', tablefmt='github') + "\n"
+        for coll in mongo_db.list_collection_names():
+            if coll == collection:
+                continue
+            docs = query_collection(coll)
+            if docs:
+                return "\n markdown\n" + tabulate(docs, headers='keys', tablefmt='github') + "\n"
+        return "**No documents found.**"
+    except Exception as e:
+        return f"Invalid input format or error: {str(e)}"
+def aggregate_group_by(_input: str):
+    try:
+        if _input.strip().startswith('{'):
+            args = ast.literal_eval(_input)
+            collection = args.get('collection_name') or args.get('collection')
+            field = args.get('group_by') or args.get('field')
+        else:
+            args = dict(x.split('=') for x in _input.split(','))
+            collection = args['collection']
+            field = args['field']
+        pipeline = [
+            {'$group': {'_id': f"${field}", 'count': {'$sum': 1}}},
+            {'$project': {'_id': 0, field: '$_id', 'count': 1}}
+        ]
+        result = list(mongo_db[collection].aggregate(pipeline))
+        if not result:
+            return "**No data found.**"
+        return "\n markdown\n" + tabulate(result, headers='keys', tablefmt='github') + "\n"
+    except Exception as e:
+        return f"Aggregation failed: {e}"
+def get_all_documents(collection: str):
+    try:
+        docs = list(mongo_db[collection].find({}, {'_id': 0}))
+        if not docs:
+            return "**No documents found.**"
+        return "\n markdown\n" + tabulate(docs, headers='keys', tablefmt='github') + "\n"
+    except Exception as e:
+        return f"Error fetching documents: {e}"
+def fuzzy_find_documents(query: str):
+    try:
+        parts = dict(part.strip().split('=', 1) for part in query.split(','))
+        collection = parts['collection']
+        value = parts['value']
+        threshold = int(parts.get('threshold', 80))
+        matches = []
+        for doc in mongo_db[collection].find({}, {'_id': 0}):
+            if any(fuzz.partial_ratio(str(v).lower(), value.lower()) >= threshold for v in doc.values()):
+                matches.append(doc)
+        if not matches:
+            return "**No fuzzy matches found.**"
+        return "\n markdown\n" + tabulate(matches, headers='keys', tablefmt='github') + "\n"
+    except Exception as e:
+        return f"Fuzzy match error: {e}"
+def join_collections_tool_func(_input: str):
+    try:
+        args = dict(x.strip().split('=', 1) for x in _input.split(','))
+        from_coll = args['from']
+        key = args['key']
+        to_coll = args['to']
+        match = args['match']
+        return_field = args['return']
+        next_key = args.get('next_key')
+        next_to = args.get('next_to')
+        next_match = args.get('next_match')
+        to_docs = {doc[match]: doc for doc in mongo_db[to_coll].find() if match in doc}
+        joined = []
+        for doc in mongo_db[from_coll].find({}, {'_id': 0}):
+            foreign_doc = to_docs.get(doc.get(key))
+            if not foreign_doc:
+                continue
+            merged = {**doc, **foreign_doc}
+            joined.append(merged)
+        if next_key and next_to and next_match:
+            next_docs = {doc[next_match]: doc for doc in mongo_db[next_to].find() if next_match in doc}
+            for doc in joined:
+                user_doc = next_docs.get(doc.get(next_key))
+                if user_doc:
+                    doc[return_field] = user_doc.get(return_field, 'Unknown')
+                else:
+                    doc[return_field] = 'Unknown'
+        if not joined:
+            return "**No documents found.**"
+        final = [{return_field: doc.get(return_field)} for doc in joined if return_field in doc]
+        return "\n markdown\n" + tabulate(final, headers='keys', tablefmt='github') + "\n"
+    except Exception as e:
+        return f"Join failed: {e}"
+def smart_join_router(prompt: str) -> str:
+    prompt_lower = prompt.lower()
+    if 'payment' in prompt_lower and any(term in prompt_lower for term in ['who', 'name', 'user', 'person']):
+        return 'from=Payments, key=order_id, to=Orders, match=order_id, next_key=user_id, next_to=Users, next_match=user_id, return=name'
+    elif 'order' in prompt_lower and 'name' in prompt_lower:
+        return 'from=Orders, key=user_id, to=Users, match=user_id, return=name'
+    return 'Unable to auto-generate join path. Please provide more context.'
+def init_mongo_agent_from_uri(mongo_uri: str, database_name: str = None):
+    """Initialize global mongo_client, mongo_db and build the LangChain agent tools for MongoDB access."""
+    global mongo_client, mongo_db, agent_executor, db_mode
+    try:
+        mongo_client = MongoClient(mongo_uri, serverSelectionTimeoutMS=5000)
+        # Trigger a ping to validate connection
+        mongo_client.admin.command('ping')
+        # Try to get DB name from URI if not provided
+        if database_name is None:
+            parsed = urlparse(mongo_uri)
+            path = parsed.path.lstrip('/')
+            if path:
+                database_name = path
+            else:
+                database_name = "test"   # fallback if URI has no db
+        mongo_db = mongo_client[database_name]
+        logger.info('Connected to MongoDB Atlas database: %s', database_name)
+        tools = [
+            Tool(name='FindDocuments', func=find_docs_tool_func, description='Flexible MongoDB search...'),
+            Tool(name='ListCollections', func=lambda x: mongo_db.list_collection_names(), description='List all collections...'),
+            Tool(name='AggregateGroupBy', func=aggregate_group_by, description='Group and count by any field...'),
+            Tool(name='GetAllDocuments', func=get_all_documents, description='Fetch all documents from a collection...'),
+            Tool(name='FuzzyFindDocuments', func=fuzzy_find_documents, description='Fuzzy match documents across all fields...'),
+            Tool(name='JoinCollections', func=join_collections_tool_func, description='Join related collections to return names instead of IDs...'),
+            Tool(name='SmartJoinCollections', func=smart_join_router, description='Suggest join formats...')
+        ]
+        agent = initialize_agent(
+            tools=tools,
+            llm=llm,
+            agent_type=AgentType.CONVERSATIONAL_REACT_DESCRIPTION,
+            memory=memory,
+            verbose=True,
+            prefix="You are MongoDBQueryBot. Use tools to fetch read-only data and do not leak schema.",
+            handle_parsing_errors=True
+        )
+        agent_executor = agent
+        db_mode = 'mongo'
+        logger.info('Mongo agent initialized')
+    except Exception as e:
+        logger.error('Failed to initialize Mongo agent: %s', e)
+        traceback.print_exc()
+# --------------------------
+# Routes
+# --------------------------
+@app.route('/')
+def index():
+    return render_template('index_db_json.html')
+# Upload endpoint intentionally disabled (dynamic upload removed)
+@app.route('/upload_db', methods=['POST'])
+@error_safe
+def upload_db():
+    return jsonify({'success': False, 'message': 'Dynamic DB upload is disabled. This server uses static configured DB URIs.'}), 403
+@app.route('/connect_db', methods=['POST'])
+@error_safe
+def connect_db():
+    global agent_executor, db_mode
+    data = request.get_json(force=True)
+    uri = data.get('uri', '').strip()
+    if not uri:
+        return jsonify({"success": False, "message": "❌ No connection string provided."}), 400
+    try:
+        # --- MongoDB ---
+        if uri.startswith("mongodb://") or uri.startswith("mongodb+srv://"):
+            init_mongo_agent_from_uri(uri)
+            return jsonify({"success": True, "message": "✅ Connected to MongoDB agent."})
+        # --- PostgreSQL ---
+        elif uri.startswith("postgresql://"):
+            init_sql_agent_from_uri(uri, dialect="postgresql")
+            return jsonify({"success": True, "message": "✅ Connected to PostgreSQL agent."})
+        # --- MySQL ---
+        elif uri.startswith("mysql://") or uri.startswith("mysql+pymysql://"):
+            init_sql_agent_from_uri(uri, dialect="mysql")
+            return jsonify({"success": True, "message": "✅ Connected to MySQL agent."})
+        # --- SQLite ---
+        elif uri.startswith("sqlite:///") or uri.startswith("sqlite://"):
+            init_sql_agent_from_uri(uri, dialect="sqlite")
+            return jsonify({"success": True, "message": "✅ Connected to SQLite agent."})
+        # --- SQL Server ---
+        else:
+            init_sql_agent_from_uri(uri)
+            return jsonify({"success": True, "message": "✅ Connected to SQL agent."})
+    except Exception as e:
+        logger.error("Failed to connect DB: %s", e)
+        return jsonify({"success": False, "message": f"❌ Connection failed: {e}"}), 500
+# @app.route('/generate', methods=['POST'])
+# @error_safe
+# def generate():
+#     try:
+#         data = request.get_json(force=True)
+#         prompt = data.get('prompt', '').strip()
+#         if not prompt:
+#             return jsonify({'status': 'error', 'message': 'Prompt is required'}), 400
+#         # if is_schema_leak_request(prompt) or is_schema_request(prompt):
+#         #     msg = '⛔ Sorry, you\'re not allowed to access structure/schema information.'
+#         #     socketio.emit('final', {'message': msg})
+#         #     return jsonify({'status': 'blocked', 'message': msg}), 403
+#         # if is_sensitive_request(prompt):
+#         #     msg = '⛔ This query may involve sensitive or protected information. Please rephrase your question.'
+#         #     socketio.emit('final', {'message': msg})
+#         #     return jsonify({'status': 'blocked', 'message': msg}), 403
+#     except Exception as e:
+#         traceback.print_exc()
+#         return jsonify({'status': 'error', 'message': 'Invalid input'}), 400
+#     def run_agent():
+#         try:
+#             result = agent_executor.invoke({'input': prompt})
+#             final_answer = result.get('output', '')
+#             if not final_answer.strip():
+#                 final_answer = "Please, rephrase your query because I can't exactly understand, what you want !"
+#             socketio.emit('final', {'message': final_answer})
+#         except Exception as e:
+#             error_message = str(e)
+#             if '429' in error_message and 'quota' in error_message.lower():
+#                 user_friendly_msg = '🚦 Agent is busy, try again after sometime.'
+#             else:
+#                 user_friendly_msg = f'Agent failed: {error_message}'
+#             socketio.emit('final', {'message': user_friendly_msg})
+#             traceback.print_exc()
+#     threading.Thread(target=run_agent).start()
+#     return jsonify({'status': 'ok'}), 200
+@app.route('/generate', methods=['POST'])
+@error_safe
+def generate():
+    try:
+        data = request.get_json(force=True)
+        prompt = data.get('prompt', '').strip()
+        if not prompt:
+            return jsonify({'status': 'error', 'message': 'Prompt is required'}), 400
+        # Optional safety checks (commented out in your snippet)
+        # if is_schema_leak_request(prompt) or is_schema_request(prompt):
+        #     msg = "⛔ Sorry, you're not allowed to access structure/schema information."
+        #     socketio.emit('final', {'message': msg})
+        #     return jsonify({'status': 'blocked', 'message': msg}), 403
+        #
+        # if is_sensitive_request(prompt):
+        #     msg = "⛔ This query may involve sensitive or protected information. Please rephrase your question."
+        #     socketio.emit('final', {'message': msg})
+        #     return jsonify({'status': 'blocked', 'message': msg}), 403
+    except Exception:
+        traceback.print_exc()
+        return jsonify({'status': 'error', 'message': 'Invalid input'}), 400
+    try:
+        # Run the agent synchronously and normalize the response
+        result = agent_executor.invoke({'input': prompt})
+        if isinstance(result, dict):
+            final_answer = (
+                result.get('final_answer')
+                or result.get('final')
+                or result.get('output')
+                or result.get('answer')
+                or result.get('text')
+                or ""
+            )
+        else:
+            final_answer = str(result or "")
+        if final_answer is None:
+            final_answer = ""
+        final_answer = final_answer.strip()
+        if not final_answer:
+            final_answer = "Please, rephrase your query because I can't exactly understand, what you want !"
+        # Emit via socketio (best-effort)
+        try:
+            socketio.emit('final', {'message': final_answer})
+        except Exception:
+            app.logger.debug("socket emit failed, continuing")
+        # Return final_answer in the HTTP response
+        return jsonify({'status': 'ok', 'prompt': prompt, 'final_answer': final_answer}), 200
+    except Exception as e:
+        app.logger.exception("Agent invocation failed")
+        # Friendly message for certain common failures (example: quota/429)
+        err_text = str(e)
+        if '429' in err_text and 'quota' in err_text.lower():
+            user_msg = '🚦 Agent is busy, try again after sometime.'
+        else:
+            user_msg = f'Agent error: {err_text[:500]}'
+        # Still emit to clients so UIs listening get notified
+        try:
+            socketio.emit('final', {'message': user_msg})
+        except Exception:
+            app.logger.debug("socket emit failed during error handling")
+        return jsonify({'status': 'error', 'prompt': prompt, 'final_answer': '', 'message': user_msg}), 500
+# --------------------------
+# Error handlers
+# --------------------------
+@app.errorhandler(Exception)
+def handle_all_errors(e):
+    print(f"[ERROR] Global handler caught an exception: {str(e)}")
+    traceback.print_exc()
+    return jsonify({'status': 'error', 'message': 'An unexpected error occurred'}), 500
+from werkzeug.exceptions import TooManyRequests
+@app.errorhandler(TooManyRequests)
+def handle_429_error(e):
+    return jsonify({
+        'status': 'error',
+        'message': '🚦 Agent is busy, try again after sometime.'
+    }), 429
+# --------------------------
+# Startup: initialize both agents using static URIs
+# --------------------------
+if __name__ == '__main__':
+    # Initialize SQL agent (static)
+    # init_sql_agent_from_uri(DB_URI)
+    # # Initialize Mongo agent (static)
+    # init_mongo_agent_from_uri(MONGO_URI, database_name='shopdb')
+    socketio.run(app, host='0.0.0.0', port=5000, debug=True)