"""
Gradio demo that exposes your agentic QA pipeline (uses smolagents CodeAgent + a BM25 retriever).
Intended for deployment to Hugging Face Spaces. Set HF_TOKEN in Space secrets or environment.
"""

import os
import traceback
import gradio as gr

# Basic ML / NLP libs used by your pipeline
import datasets
from langchain.docstore.document import Document
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.retrievers import BM25Retriever

# smolagents agent pieces
from smolagents import Tool, InferenceClientModel, CodeAgent

# -------------------------
# Document preparation
# -------------------------
def prepare_knowledge_base(cache_dir="/tmp/hf_kb_cache"):
    """
    Download and prepare the HF docs dataset, filter to transformers docs,
    chunk into smaller documents and return the processed doc list.
    This function caches results across runs (simple file-check).
    """
    import os
    import pickle

    cache_path = os.path.join(cache_dir, "docs_processed.pkl")
    os.makedirs(cache_dir, exist_ok=True)

    # If cached, load and return
    if os.path.exists(cache_path):
        try:
            with open(cache_path, "rb") as f:
                docs_processed = pickle.load(f)
            return docs_processed
        except Exception:
            # fall through to re-create cache
            pass

    knowledge_base = datasets.load_dataset("m-ric/huggingface_doc", split="train")
    # Keep only transformers docs (same filter as your original snippet)
    knowledge_base = knowledge_base.filter(lambda row: row["source"].startswith("huggingface/transformers"))
    source_docs = [
        Document(page_content=doc["text"], metadata={"source": doc["source"].split("/")[1]})
        for doc in knowledge_base
    ]

    # Split into chunks
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=500,
        chunk_overlap=50,
        add_start_index=True,
        strip_whitespace=True,
        separators=["\n\n", "\n", ".", " ", ""],
    )
    docs_processed = text_splitter.split_documents(source_docs)

    import pickle
    with open(cache_path, "wb") as f:
        pickle.dump(docs_processed, f)

    return docs_processed

# -------------------------
# Retriever tool for agent
# -------------------------
class RetrieverTool(Tool):
    name = "retriever"
    description = "Uses BM25 retrieval over transformers docs to fetch context relevant to a question."
    inputs = {
        "query": {
            "type": "string",
            "description": "A short query describing the information to retrieve (affirmative form).",
        }
    }
    output_type = "string"

    def __init__(self, docs, k=8, **kwargs):
        super().__init__(**kwargs)
        # Build a BM25 retriever from the processed docs
        self.retriever = BM25Retriever.from_documents(docs, k=k)

    def forward(self, query: str) -> str:
        assert isinstance(query, str), "query must be a string"
        docs = self.retriever.invoke(query)
        formatted = "\nRetrieved documents:\n" + "".join(
            [
                f"\n\n===== Document {i} =====\n{doc.page_content}"
                for i, doc in enumerate(docs)
            ]
        )
        return formatted

# -------------------------
# Agent initialization
# -------------------------
# Prepare docs
DOCS = prepare_knowledge_base()

# Initialize tool instance
retriever_tool = RetrieverTool(DOCS)

# NOTE: On HF Spaces you can set environment variable HF_TOKEN in the UI (Settings -> Secrets).
HF_TOKEN = os.environ.get("HF_TOKEN")
if HF_TOKEN is None:
    print("Warning: HF_TOKEN not set. If your chosen model requires authentication, set HF_TOKEN in environment/secrets.")


model = InferenceClientModel()  # default model; you can set model_id arg if needed
agent = CodeAgent(
    tools=[retriever_tool],
    model=model,
    max_steps=4,
    verbosity_level=1,
)


# -------------------------
# Gradio interface
# -------------------------
def run_agent(question: str):
    """Run the agent and return the final answer (or a helpful error)."""
    if not question or question.strip() == "":
        return "Please enter a question."

    # If agent couldn't be created, return fallback info
    if agent is None:
        return "Agent not initialized in this environment. Check logs in the Space and ensure `smolagents` is installed and HF_TOKEN is configured."

    result = agent.run(question)
    return result


with gr.Blocks(title="Agentic RAG Demo") as demo:
    gr.Markdown(
        """
        # Transformers docs QA (Agent demo)
        Ask the agent a question about Hugging Face Transformers docs.
        Example: *For a transformers model training, which is slower, the forward or the backward pass?*
        """
    )
    with gr.Row():
        inp = gr.Textbox(placeholder="Write your question here...", label="Question", lines=2)
        out = gr.Textbox(label="Agent answer", lines=10)
    with gr.Row():
        run_btn = gr.Button("Ask")
        clear_btn = gr.Button("Clear")
    run_btn.click(fn=run_agent, inputs=inp, outputs=out)
    clear_btn.click(lambda: "", None, inp)

demo.launch()