# --- IMPORTS ---
from werkzeug.exceptions import TooManyRequests
from flask import Flask, request, jsonify, render_template
from flask_socketio import SocketIO, emit
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain.agents import initialize_agent, AgentType, create_react_agent, AgentExecutor
from langchain_community.agent_toolkits import create_sql_agent, SQLDatabaseToolkit
from langchain_community.utilities import SQLDatabase
from langchain.tools import Tool
from langchain.memory import ConversationBufferMemory
from pymongo import MongoClient
import threading
import os, uuid
import re
import traceback
import ast
from bson import json_util
from dotenv import load_dotenv
from werkzeug.utils import secure_filename
from werkzeug.exceptions import HTTPException
from langchain.prompts import ChatPromptTemplate
from tabulate import tabulate
from fuzzywuzzy import fuzz
# from langchain_groq import ChatGroq
from datetime import datetime

def error_safe(f):
    def wrapper(*args, **kwargs):
        try:
            return f(*args, **kwargs)
        except HTTPException as he:
            return jsonify({"status": "error", "message": he.description}), he.code
        except Exception as e:
            print("[ERROR] Uncaught Exception in", f.__name__)
            traceback.print_exc()
            return jsonify({"status": "error", "message": str(e)}), 500
    wrapper.__name__ = f.__name__
    return wrapper


# --- ENV + FLASK SETUP ---
load_dotenv()
os.environ["GEMINI_API_KEY"] = os.getenv("GEMINI_API_KEY")

app = Flask(__name__)
app.config['SECRET_KEY'] = os.urandom(32)
app.config['UPLOAD_FOLDER'] = 'uploads'
socketio = SocketIO(app, cors_allowed_origins="*")
os.makedirs(app.config['UPLOAD_FOLDER'], exist_ok=True)

llm = ChatGoogleGenerativeAI(
    temperature=0.2,
    model="gemini-2.0-flash",
    max_retries=50,
    api_key=os.getenv("GEMINI_API_KEY")
)
# llm = ChatGroq(temperature=0.2, model_name="mistral-saba-24b",api_key=os.getenv("GROQ_API_KEY"))

# --- GLOBALS ---
agent_executor = None
memory = ConversationBufferMemory(
    memory_key="chat_history", return_messages=True, input_key="input")
mongo_db = None
client = None
db_mode = None  # "mongo" or "sql"

# --- SHARED ---
def is_schema_request(prompt: str) -> bool:
    pattern = re.compile(
        r'\b(schema|table names|tables|columns|structure|column names|collections?|field names|metadata|describe|show)\b', re.IGNORECASE)
    return bool(pattern.search(prompt))


def is_sensitive_request(prompt: str) -> bool:
    sensitive_keywords = [
        "password", "token", "credential", "secret", "api key", "schema", "structure",
        "collection name", "field name", "user_id", "order_id", "payment_id",
        "internal", "database structure", "table structure", "email", "phone", "contact", "ssn"
    ]
    lowered = prompt.lower()
    return any(keyword in lowered for keyword in sensitive_keywords)


intent_prompt = ChatPromptTemplate.from_messages([
    ("system", "Classify if the user is asking schema/structure/sensitive info (tables, columns, schema): YES or NO."),
    ("human", "{prompt}")
])
intent_checker = intent_prompt | llm


def is_schema_leak_request(prompt):
    try:
        classification = intent_checker.invoke({"prompt": prompt})
        return "yes" in classification.content.strip().lower()
    except:
        return False

# --- INIT SQL AGENT ---
def init_sql_agent(db_path):
    global agent_executor, db_mode
    db = SQLDatabase.from_uri(f"sqlite:///{db_path}")
    toolkit = SQLDatabaseToolkit(db=db, llm=llm)
    prefix = '''You are a helpful SQL expert agent that ALWAYS returns natural language answers using the tools.'''
    # Always format your responses in Markdown. For example:
    # - Use bullet points
    # - Use bold for headers
    # - Wrap code in triple backticks
    # - Tables should use Markdown table syntax

    # You must NEVER:
    # - Show or mention SQL syntax.
    # - Reveal table names, column names, or database schema.
    # - Respond with any technical details or structure of the database.
    # - Return code or tool names.
    # - Give wrong Answers.

    # You must ALWAYS:
    # - Respond in plain, friendly language.
    # - Don't Summarize the result for the user (e.g., "There are 9 tables in the system.")
    # - If asked to list table names or schema, politely refuse and respond with:
    #     "I'm sorry, I can't share database structure information."
    # - ALWAYS HAVE TO SOLVE COMPLEX USER QUERIES. FOR THAT, UNDERSTAND THE PROMPT, ANALYSE PROPER AND THEN GIVE ANSWER.
    # - Your Answers should be correct, you have to do understand process well and give accurate answers.
    # - IF USER ASK ABOUT DATA, Which is not there in a database, then GIVE FOLLOWING ANSWER:
    #     "There is no such data in the Database."

    # Strict Rules You MUST Follow:
    # - NEVER display or mention SQL queries.
    # - NEVER explain SQL syntax or logic.
    # - NEVER return technical or code-like responses.
    # - ONLY respond in natural, human-friendly language.
    # - You are not allow to give the name of any COLUMNS, TABLES, DATABASE, ENTITY, SYNTAX, STRUCTURE, DESIGN, ETC...

    # If the user asks for anything other than retrieving data (SELECT), respond using this exact message:
    #     "I'm not allowed to perform operations other than SELECT queries. Please ask something that involves reading data."

    # Do not return SQL queries or raw technical responses to the user.

    # For example:
    # Wrong: SELECT * FROM ...
    # Correct: The user assigned to the cart is Alice Smith.

    # Use the tools provided to get the correct data from the database and summarize the response clearly.
    # If the input is unclear or lacks sufficient data, ask for clarification using the SubmitFinalAnswer tool.
    # Never return SQL queries as your response.

    # If you cannot find an answer,
    # Double-check your query and running it again.
    # - If a query fails, revise and try again.
    # - Else 'No data found' using SubmitFinalAnswer.No SQL, no code. '''
    agent_executor = create_sql_agent(
        llm=llm,
        toolkit=toolkit,
        verbose=True,
        prefix=prefix,
        agent_type=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
        memory=memory,
        agent_executor_kwargs={"handle_parsing_errors": True},
    )
    db_mode = "sql"

# --- INIT MONGO AGENT ---
system_message = """
        You are **MongoDBQueryBot**, a highly intelligent and accurate assistant for answering questions about data stored in a MongoDB database using tools.
        """

# ### 🚨 Critical Instructions (Strictly Follow These):
# - You **must always** use tools provided to answer user questions.
# - Always join IDs with associated human-readable values like names or titles when answering.
# - Prefer displaying `user name`, `employee name`, or `product name` instead of internal IDs like `user_id`, `emp_id`, or `product_id`.
# - Avoid responding only with technical identifiers. Make responses meaningful to users.
# - **Never** guess or fabricate any information.
# - **Do not** show raw JSON, field names, or database structure.
# - Your role is **read-only**: do not suggest or perform insert/update/delete.
# - After Using All the available tools, if you are Unable to find any documents, then give followig ANSWER:
#     "Please, rephrase your query because I can't exactly understand, what you want !"
# - If a query can't be answered or is unrelated to reading data, reply:
# ❌ "I'm only allowed to retrieve data. Please ask a query involving reading information."
# - IF USER ASK ABOUT DATA, Which is not there in a database, then GIVE FOLLOWING ANSWER:
#             "There is no such data in the Database."
# - When returning answers:
#     - Do **not return internal IDs** like `user_id`, `order_id`, `payment_id`, etc.
#     - Instead, use human-readable fields like `name`, `full_name`, `user_name`, etc., from related collections.
#     - If only an ID is available, try joining the relevant collections to fetch the proper display name.

# ### 🧠 How to Think:
# - Understand **exactly** what the user is trying to ask. Do not answer if unclear — ask for clarification.
# - Translate the user prompt into tool inputs by identifying:
# - Which collection to search
# - What value or field they're referring to
# - The correct format expected by the tool

# ### 🛠️ Tool Usage Guide:
# - Use `FindDocuments` for queries like:
# - "Show me all employees named John"
# - "What is the salary of Manager X?"
# - Use `ListCollections` to discover available data types (but don’t share them directly).
# - **IMPORTANT : Don't Iterate only in one tool, if you can't able to answer using current tool you using, then swith the tool !**
# - Use `JoinCollections` to resolve IDs into names when the question asks about people, customers, or products.
# - When resolving names from payments, use this format:
#   `from=Payments, key=order_id, to=Orders, match=order_id, next_key=user_id, next_to=Users, next_match=user_id, return=name`

# - Your goal is to **return the person's name** (e.g., `name`, `user_name`, `full_name`) not their ID.
# - Always prioritize returning names instead of internal identifiers.
# - Examples:
#     - For payment-related questions → Join Payments → Orders → Users and return name
#     - For order questions → Join Orders → Users and return user names

# ### 🧾 Response Format:
# - Use **clear markdown with tables** when displaying data.
# - If no data is found: return `**No documents found.**`
# - Stay professional, brief, and relevant.

# ### 🚫 Never Do This:
# - Do not leak MongoDB structure, schema, or field names.
# - Do not suggest code, MongoDB syntax, or field mappings.
# - Do not hallucinate or make assumptions.

# Start by analyzing the prompt carefully, select the right tool, invoke it, and return a user-friendly answer based on the result.
# """


def find_docs_tool_func(query: str) -> str:
    """
    Flexible MongoDB search with fallback:
    - First tries in specified collection.
    - If no results found, falls back to search across all collections.
    Input format:
    - collection=<collection>, key=<field>, value=<value>
    - OR: collection=<collection>, value=<value>
    """
    try:
        parts = dict(part.strip().split("=", 1)
                     for part in query.split(",") if "=" in part)
        collection = parts.get("collection")
        key = parts.get("key")
        value = parts.get("value")
        if not collection:
            return "❌ 'collection' is required."

        def query_collection(coll_name):
            if key and value:
                return list(mongo_db[coll_name].find({key: value}, {'_id': 0}))
            elif value:
                return [doc for doc in mongo_db[coll_name].find({}, {'_id': 0}) if any(str(v).lower() == value.lower() for v in doc.values())]
            else:
                return list(mongo_db[coll_name].find({}, {'_id': 0}))

        docs = query_collection(collection)
        if docs:
            return "\n markdown\n" + tabulate(docs, headers="keys", tablefmt="github") + "\n"

        for coll in mongo_db.list_collection_names():
            if coll == collection:
                continue
            docs = query_collection(coll)
            if docs:
                return "\n markdown\n" + tabulate(docs, headers="keys", tablefmt="github") + "\n"

        return "**No documents found.**"
    except Exception as e:
        return f"Invalid input format or error: {str(e)}"


def aggregate_group_by(_input: str):
    try:
        if _input.strip().startswith("{"):
            # Parse JSON-like string
            args = ast.literal_eval(_input)
            collection = args.get("collection_name") or args.get("collection")
            field = args.get("group_by") or args.get("field")
        else:
            # Handle legacy input format
            args = dict(x.split("=") for x in _input.split(","))
            collection = args["collection"]
            field = args["field"]

        pipeline = [
            {"$group": {"_id": f"${field}", "count": {"$sum": 1}}},
            {"$project": {"_id": 0, field: "$_id", "count": 1}}
        ]
        result = list(mongo_db[collection].aggregate(pipeline))
        if not result:
            return "**No data found.**"
        return "\n markdown\n" + tabulate(result, headers="keys", tablefmt="github") + "\n"
    except Exception as e:
        return f"Aggregation failed: {e}"


def get_all_documents(collection: str):
    try:
        docs = list(mongo_db[collection].find({}, {'_id': 0}))
        if not docs:
            return "**No documents found.**"
        return "\n markdown\n" + tabulate(docs, headers="keys", tablefmt="github") + "\n"
    except Exception as e:
        return f"Error fetching documents: {e}"


def fuzzy_find_documents(query: str):
    try:
        parts = dict(part.strip().split("=", 1) for part in query.split(","))
        collection = parts["collection"]
        value = parts["value"]
        threshold = int(parts.get("threshold", 80))

        matches = []
        for doc in mongo_db[collection].find({}, {'_id': 0}):
            if any(fuzz.partial_ratio(str(v).lower(), value.lower()) >= threshold for v in doc.values()):
                matches.append(doc)
        if not matches:
            return "**No fuzzy matches found.**"
        return "\n markdown\n" + tabulate(matches, headers="keys", tablefmt="github") + "\n"
    except Exception as e:
        return f"Fuzzy match error: {e}"

# def join_collections_tool_func(_input: str):
#     try:
#         # Parse input like: from=Products, key=category_id, to=Categories, match=category_id, return=category_name
#         args = dict(x.strip().split("=", 1) for x in _input.split(","))
#         from_collection = args["from"]
#         foreign_key = args["key"]
#         to_collection = args["to"]
#         match_key = args["match"]
#         return_field = args["return"]

#         results = []
#         foreign_lookup = {
#             doc[match_key]: doc.get(return_field)
#             for doc in mongo_db[to_collection].find()
#             if match_key in doc
#         }

#         for doc in mongo_db[from_collection].find({}, {'_id': 0}):
#             doc[return_field] = foreign_lookup.get(doc.get(foreign_key), "Unknown")
#             results.append(doc)

#         if not results:
#             return "**No documents found.**"

#         return "\n markdown\n" + tabulate(results, headers="keys", tablefmt="github") + "\n"

#     except Exception as e:
#         return f"Join failed: {e}"


def join_collections_tool_func(_input: str):
    """
    Supports 2-level join (Payments → Orders → Users) or any pair-wise join
    Input formats:
    - from=Payments, key=order_id, to=Orders, match=order_id, next_key=user_id, next_to=Users, next_match=user_id, return=name
    - from=Products, key=category_id, to=Categories, match=category_id, return=category_name
    """
    try:
        args = dict(x.strip().split("=", 1) for x in _input.split(","))
        from_coll = args["from"]
        key = args["key"]
        to_coll = args["to"]
        match = args["match"]
        return_field = args["return"]

        next_key = args.get("next_key")
        next_to = args.get("next_to")
        next_match = args.get("next_match")

        # First join (e.g., Payments → Orders)
        to_docs = {doc[match]: doc for doc in mongo_db[to_coll].find()
                   if match in doc}
        joined = []
        for doc in mongo_db[from_coll].find({}, {'_id': 0}):
            foreign_doc = to_docs.get(doc.get(key))
            if not foreign_doc:
                continue
            merged = {**doc, **foreign_doc}
            joined.append(merged)

        # Second join (e.g., Orders → Users)
        if next_key and next_to and next_match:
            next_docs = {
                doc[next_match]: doc for doc in mongo_db[next_to].find() if next_match in doc}
            for doc in joined:
                user_doc = next_docs.get(doc.get(next_key))
                if user_doc:
                    doc[return_field] = user_doc.get(return_field, "Unknown")
                else:
                    doc[return_field] = "Unknown"

        # Prepare final result
        if not joined:
            return "**No documents found.**"
        final = [{return_field: doc.get(return_field)}
                 for doc in joined if return_field in doc]
        return "\n markdown\n" + tabulate(final, headers="keys", tablefmt="github") + "\n"

    except Exception as e:
        return f"Join failed: {e}"


def smart_join_router(prompt: str) -> str:
    """
    An intelligent router that suggests the correct JoinCollections input string
    for common user intent like payments → orders → users → name.
    """
    prompt_lower = prompt.lower()
    if "payment" in prompt_lower and any(term in prompt_lower for term in ["who", "name", "user", "person"]):
        return "from=Payments, key=order_id, to=Orders, match=order_id, next_key=user_id, next_to=Users, next_match=user_id, return=name"
    elif "order" in prompt_lower and "name" in prompt_lower:
        return "from=Orders, key=user_id, to=Users, match=user_id, return=name"
    # Extend as needed
    return "Unable to auto-generate join path. Please provide more context."


def init_mongo_agent(json_path):
    global agent_executor, client, mongo_db, db_mode
    # client = MongoClient("mongodb://localhost:27017/")
    client = MongoClient("mongodb+srv://dixitmwa:DixitWa%40123!@cluster0.qiysaz9.mongodb.net/")
    mongo_db = client['uploaded_mongo']
    with open(json_path, 'r', encoding='utf-8') as f:
        data = json_util.loads(f.read())

    # Handle both single-collection and multi-collection formats
    if isinstance(data, list):
        # Default collection name if only a list is provided
        collection = mongo_db['default_collection']
        collection.drop()
        collection.insert_many(data)
    elif isinstance(data, dict):
        for col_name, docs in data.items():
            collection = mongo_db[col_name]
            collection.drop()
            if isinstance(docs, list):
                collection.insert_many(docs)
            else:
                collection.insert_one(docs)
    else:
        raise ValueError("Unsupported JSON format. Must be a list or dict.")

    def list_collections(_input=None):
        return mongo_db.list_collection_names()

    find_docs_tool = Tool(
        name="FindDocuments",
        description=(
            "Use this tool to find documents in a MongoDB collection.\n"
            "Input format:\n"
            "- `collection=<collection>, key=<field>, value=<value>` for precise queries\n"
            "- OR `collection=<collection>, value=<value>` to search across all fields\n"
            "If `key` is omitted, the tool will automatically scan all fields to find matching values.\n"
            "Examples:\n"
            "- `collection=default_collection, key=name, value=Lauren Alexander`\n"
            "- `collection=default_collection, value=Lauren Alexander`"
        ),
        func=find_docs_tool_func)

    aggregate_tool = Tool(
        name="AggregateGroupBy",
        func=aggregate_group_by,
        description=(
            "Group documents and count by any field. Format: collection=<name>, field=<group_by_field>. E.g., collection=residents, field=gender"
        )
    )
    get_all_documents_tool = Tool(
        name="GetAllDocuments",
        func=get_all_documents,
        description=(
            "Fetch all documents from a collection. Input: collection name only. Example: residents"
        )
    )

    fuzzy_tool = Tool(
        name="FuzzyFindDocuments",
        func=fuzzy_find_documents,
        description=("Fuzzy match documents across all fields in a collection. Format: collection=<name>, value=<search_term>, threshold=80 (optional)"
                     )
    )

    join_collection_tool = Tool(
        name="JoinCollections",
        func=join_collections_tool_func,
        description=(
            "Join collections to map foreign keys to human-readable values. Supports 1 or 2-level joins.\n"
            "Formats:\n"
            "- from=Payments, key=order_id, to=Orders, match=order_id, return=status\n"
            "- from=Payments, key=order_id, to=Orders, match=order_id, next_key=user_id, next_to=Users, next_match=user_id, return=name"
        )
    )
    smart_router_tool = Tool(
        name="SmartJoinRouter",
        func=smart_join_router,
        description=(
            "Suggest the correct JoinCollections input format based on user intent.\n"
            "Use this when you are unsure how to form the join input."
        )
    )

    tools = [
        Tool(name="FindDocuments", func=find_docs_tool,
             description="Flexible MongoDB search..."),
        Tool(name="ListCollections", func=lambda x: list_collections(),
             description="List all collections..."),
        Tool(name="AggregateGroupBy", func=aggregate_tool,
             description="Group and count by any field..."),
        Tool(name="GetAllDocuments", func=get_all_documents_tool,
             description="Fetch all documents from a collection..."),
        Tool(name="FuzzyFindDocuments", func=fuzzy_tool,
             description="Fuzzy match documents across all fields..."),
        Tool(name="JoinCollections", func=join_collection_tool,
             description="Join related collections to return names instead of IDs..."),
        Tool(name="SmartJoinCollections", func=smart_router_tool,
             description="Smrt Join related collections to return names instead of IDs...")
    ]

    agent_executor = initialize_agent(
        tools=tools,
        llm=llm,
        agent_type=AgentType.CONVERSATIONAL_REACT_DESCRIPTION,
        memory=memory,
        verbose=True,
        prefix=system_message,
        handle_parsing_errors=True
    )
    db_mode = "mongo"


@app.errorhandler(Exception)
def handle_all_errors(e):
    print(f"[ERROR] Global handler caught an exception: {str(e)}")
    traceback.print_exc()

    if isinstance(e, HTTPException):
        return jsonify({"status": "error", "message": e.description}), e.code

    return jsonify({"status": "error", "message": "An unexpected error occurred"}), 500


@app.errorhandler(TooManyRequests)
def handle_429_error(e):
    return jsonify({
        "status": "error",
        "message": "🚦 Agent is busy, try again after sometime."
    }), 429

# --- ROUTES ---


@app.route("/")
def index():
    return render_template("app_index.html")


@app.route("/upload_db", methods=["POST"])
@error_safe
def upload_db():
    file = request.files.get("file")
    if not file or file.filename == "":
        return jsonify(success=False, message="No file provided"), 400

    filename = secure_filename(file.filename)
    path = os.path.join(app.config["UPLOAD_FOLDER"], filename)
    file.save(path)

    try:
        if filename.endswith(".json"):
            init_mongo_agent(path)
            mongo_db = globals().get("mongo_db")
            db_name = getattr(mongo_db, "name", None) or os.path.splitext(filename)[0]
            return jsonify({"database_name": db_name, "message": "MongoDB initialized"}), 200
            # return jsonify(success=True, message="MongoDB initialized")
        # elif filename.endswith(".db"):
        #     init_sql_agent(path)
        #     return jsonify(success=True, message="SQL DB initialized")
        # SQL DB (.db or .sqlite)
        elif filename.lower().endswith(".db") or filename.lower().endswith(".sqlite"):
            init_sql_agent(path)  # your existing initializer
            db_name = os.path.splitext(filename)[0]
            return jsonify({"database_name": db_name, "message": "SQL DB initialized"}), 200
        else:
            return jsonify(success=False, message="Unsupported file format"), 400
    except Exception as e:
        traceback.print_exc()
        return jsonify(success=False, message=f"Init failed: {e}"), 500

@app.route("/generate", methods=["POST"])
@error_safe
def generate():
    try:
        data = request.get_json(force=True) or {}
        prompt = data.get("prompt", "").strip()
        if not prompt:
            return jsonify({"status": "error", "message": "Prompt is required"}), 400
    except Exception:
        traceback.print_exc()
        return jsonify({"status": "error", "message": "Invalid input"}), 400

    try:
        # invoke your agent synchronously
        result = agent_executor.invoke({"input": prompt})

        # Normalize final_answer from agent output safely
        if isinstance(result, dict):
            final_answer = (
                result.get("final_answer")
                or result.get("output")
                or result.get("answer")
                or result.get("text")
                or ""
            )
        else:
            final_answer = str(result or "")

        if final_answer is None:
            final_answer = ""

        # Optionally keep emitting to socket so clients listening to socketio still get it
        try:
            socketio.emit("final", {"message": final_answer})
        except Exception:
            app.logger.debug("socket emit failed, continuing")

        return jsonify({"final_answer": final_answer, "prompt": prompt}), 200

    except Exception as e:
        app.logger.exception("Agent invocation failed")
        return jsonify({"prompt": prompt, "final_answer": "", "message": f"Agent error: {str(e)[:200]}"}), 500


if __name__ == "__main__":
    # socketio.run(app, debug=True)
    socketio.run(app, host="0.0.0.0", port=7860, allow_unsafe_werkzeug=True)