# -*- coding: utf-8 -*-
"""app.ipynb

Automatically generated by Colab.

Original file is located at
    https://colab.research.google.com/drive/1DSQjlXwb4UHeF4RILlBwwHfN5jRyyvov
"""

import gradio as gr
import pandas as pd
import os
from tempfile import NamedTemporaryFile
from uuid import uuid4

# LangChain & Groq imports for embedding and LLM access
from langchain_community.document_loaders import CSVLoader
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import Chroma
from langchain_groq import ChatGroq

# ========== API Key Setup ==========
GROQ_API_KEY = os.environ.get("GROQ_API_KEY")
if not GROQ_API_KEY or not GROQ_API_KEY.strip():
    raise ValueError("❌ Please provide GROQ_API_KEY as a Space secret (Settings > Secrets).")

# ========== Global State ==========
vectorstore = None
df = None

def safe_delete_collection(vs):
    """Deletes Chroma collection if exists (prevents stale retrieval)."""
    try:
        if vs is not None and hasattr(vs, "delete_collection"):
            vs.delete_collection()
    except Exception:
        pass

def load_and_index_csv(file):
    """
    Load the CSV, build vectorstore with embeddings, and provide preview/status.
    """
    global vectorstore, df # Moved global declaration to the top

    if file is None:
        return None, "⚠️ Please upload a CSV.", None

    try:
        # Remove the prior index/collection and state
        safe_delete_collection(vectorstore)
        vectorstore = None
        df = None

        # Load user's CSV
        df = pd.read_csv(file.name)
        csv_text = df.to_csv(index=False)
        with NamedTemporaryFile(delete=False, suffix=".csv") as tmp:
            tmp.write(csv_text.encode("utf-8"))
            tmp_path = tmp.name

        # Create new index with unique collection for this upload
        loader = CSVLoader(file_path=tmp_path)
        docs = loader.load()
        embeddings = HuggingFaceEmbeddings(
            model_name="sentence-transformers/all-MiniLM-L6-v2",
            model_kwargs={'device': 'cpu'}
        )
        collection_name = f"csvpal-{uuid4().hex[:8]}"
        vectorstore = Chroma.from_documents(
            documents=docs,
            embedding=embeddings,
            collection_name=collection_name
        )

        preview_html = df.head().to_html(index=False)
        status_msg = f"✅ CSV Loaded! {len(df)} rows, {len(df.columns)} columns."
        return preview_html, status_msg, True

    except Exception as e:
        return None, f"Error loading CSV: {e}", False

def answer_question(user_input):
    """
    Retrieve top-10 relevant rows and ask the LLM to answer strictly using them.
    """
    global vectorstore, df

    if vectorstore is None or df is None:
        return "⚠️ Please upload and load a CSV first."
    if not user_input or not user_input.strip():
        return "⚠️ Please enter a question."

    # Retrieval and strict grounding
    k = 10
    try:
        docs_and_scores = vectorstore.similarity_search_with_score(user_input, k=k)
    except Exception as e:
        return f"❌ Retrieval error: {e}"

    if not docs_and_scores:
        return "⚠️ No relevant data found for your question in the CSV."

    try:
        context = "\n".join(getattr(doc, "page_content", getattr(doc, "content", "")) for doc, _ in docs_and_scores)
    except Exception:
        context = ""

    prompt = f"""You are an expert data analyst AI assistant. Use ONLY the following CSV data to answer the question below.
If the answer cannot be found in the data, say 'No sufficient data to answer.'
Do NOT guess or invent information.

Data:
{context}

Question:
{user_input}

Please answer clearly and provide Python pandas code using DataFrame `df` to reproduce the answer exactly.
"""

    try:
        chat_model = ChatGroq(model_name="llama-3.3-70b-versatile", api_key=GROQ_API_KEY)
        response = chat_model.invoke(prompt)
        if isinstance(response, str):
            answer = response
        elif hasattr(response, "content"):
            answer = response.content
        else:
            answer = str(response)
    except Exception as e:
        answer = f"❌ Error generating answer: {e}"

    return answer

# ========== Gradio UI ==========
with gr.Blocks(title="CsvPal-AI: Chat with Your CSV (RAG-powered Q&A)") as demo:
    gr.Markdown(
        """
        # 📊 CsvPal-AI
        Upload a CSV file and ask natural language questions.
        Receive clear, data-grounded answers and Python pandas code snippets.
        """
    )
    with gr.Row():
        csv_file = gr.File(label="📂 Upload CSV", file_types=[".csv"])

    status = gr.Textbox(label="Status", interactive=False, max_lines=3)
    df_preview = gr.HTML(label="🧾 CSV Preview")

    gr.Markdown(
        "⚠️ Note: Answers are based on the top 10 most relevant rows retrieved from your CSV for each question."
    )
    gr.Markdown("----")

    user_input = gr.Textbox(
        label="📝 Ask your question:",
        placeholder="e.g. What analysis can be done in the CSV data?",
        interactive=False,
        lines=1
    )

    with gr.Row():
        ask_btn = gr.Button("🤖 Get Answer", interactive=False)
        clear_btn = gr.Button("Clear")

    answer_output = gr.Textbox(
        label="Answer",
        lines=15,
        interactive=False
    )

    # UI clear on manual file clear
    def clear_all_from_file():
        return "", "", gr.update(value="", interactive=False), gr.update(interactive=False), ""

    # On file upload, index fresh and enable
    def on_file_upload(file):
        preview, msg, enable_chat = load_and_index_csv(file)
        return (
            preview or "",
            msg,
            gr.update(value="", interactive=bool(enable_chat)),
            gr.update(interactive=bool(enable_chat)),
            ""  # answer_output
        )

    # Global clear: wipes index, df, and UI
    def clear_everything():
        global vectorstore, df
        safe_delete_collection(vectorstore)
        vectorstore = None
        df = None
        return (
            None,                                          # csv_file (clears)
            "",                                            # df_preview
            "",                                            # status
            gr.update(value="", interactive=False),         # user_input clear+disable
            gr.update(interactive=False),                   # ask_btn disable
            ""                                             # answer_output clear
        )

    csv_file.upload(
        on_file_upload,
        inputs=[csv_file],
        outputs=[df_preview, status, user_input, ask_btn, answer_output]
    )
    csv_file.clear(clear_all_from_file, None, [df_preview, status, user_input, ask_btn, answer_output])

    ask_btn.click(answer_question, inputs=[user_input], outputs=[answer_output])
    user_input.submit(answer_question, inputs=[user_input], outputs=[answer_output])

    clear_btn.click(
        clear_everything,
        inputs=[],
        outputs=[csv_file, df_preview, status, user_input, ask_btn, answer_output]
    )

demo.launch()