import os
import torch
from pathlib import Path
from dotenv import load_dotenv

load_dotenv()

# Authenticate with HuggingFace Hub so gated models (e.g. EmbeddingGemma-300M) can be downloaded.
# On HF Spaces, set HF_TOKEN in Settings → Variables and secrets.
_HF_TOKEN = os.getenv("HF_TOKEN", "")
if _HF_TOKEN:
    from huggingface_hub import login as _hf_login
    _hf_login(token=_HF_TOKEN, add_to_git_credential=False)

PROJECT_ROOT = Path(__file__).parent
DATA_DIR = PROJECT_ROOT / "data" / "texts"
VECTORSTORE_DIR = PROJECT_ROOT / "vectorstore"

GOOGLE_API_KEY      = os.getenv("GOOGLE_API_KEY", "")
GROQ_API_KEY        = os.getenv("GROQ_API_KEY", "")
OPENROUTER_API_KEY  = os.getenv("OPENROUTER_API_KEY", "")

DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

# ---------------------------------------------------------------------------
# LLM options — (provider, model_id)
# Providers: "google" | "groq" | "openrouter"
# ---------------------------------------------------------------------------
LLM_OPTIONS: dict[str, tuple[str, str]] = {
    # ── Google AI Studio (free tier) ──────────────────────────────────────
    # Limits verified from aistudio.google.com/rate-limit (2026-05)
    "Gemma 4 MoE 26B  [Google]":         ("google",      "gemma-4-26b-a4b-it"),    # 15 RPM | ∞ TPM | 1500 RPD
    "Gemma 4 Dense 31B  [Google]":       ("google",      "gemma-4-31b-it"),         # 15 RPM | ∞ TPM | 1500 RPD
    "Gemini 3.1 Flash Lite  [Google]":   ("google",      "gemini-3.1-flash-lite"),  # 15 RPM | 250K TPM | 500 RPD
    "Gemini 3.5 Flash  [Google]":        ("google",      "gemini-3.5-flash"),       #  5 RPM | 250K TPM |  20 RPD
    "Gemini 2.5 Flash  [Google]":        ("google",      "gemini-2.5-flash"),       #  5 RPM | 250K TPM |  20 RPD
    "Gemini 2.5 Flash Lite  [Google]":   ("google",      "gemini-2.5-flash-lite"),  # 10 RPM | 250K TPM |  20 RPD
    # ── Groq (free tier, very fast LPU inference) ─────────────────────────
    "Llama 3.3 70B  [Groq]":             ("groq",        "llama-3.3-70b-versatile"),
    "Llama 4 Scout 17B  [Groq]":         ("groq",        "meta-llama/llama-4-scout-17b-16e-instruct"),
    "Qwen3 32B  [Groq]":                 ("groq",        "qwen/qwen3-32b"),
    "Llama 3.1 8B  [Groq]":              ("groq",        "llama-3.1-8b-instant"),
    # ── OpenRouter free models (:free = no cost, rate-limited) ────────────
    "Nvidia Nemotron 120B  [OpenRouter]":("openrouter",  "nvidia/nemotron-3-super-120b-a12b:free"),
    "OpenAI OSS 120B  [OpenRouter]":     ("openrouter",  "openai/gpt-oss-120b:free"),
    "DeepSeek V4 Flash  [OpenRouter]":   ("openrouter",  "deepseek/deepseek-v4-flash:free"),
    "Llama 3.3 70B  [OpenRouter]":       ("openrouter",  "meta-llama/llama-3.3-70b-instruct:free"),
    "Qwen3 Next 80B  [OpenRouter]":      ("openrouter",  "qwen/qwen3-next-80b-a3b-instruct:free"),
    "Gemma 4 MoE 26B  [OpenRouter]":     ("openrouter",  "google/gemma-4-26b-a4b-it:free"),
}

DEFAULT_LLM = "Gemma 4 MoE 26B  [Google]"

PROVIDER_KEYS = {
    "google":     ("GOOGLE_API_KEY",     "ai.google.dev"),
    "groq":       ("GROQ_API_KEY",       "console.groq.com"),
    "openrouter": ("OPENROUTER_API_KEY", "openrouter.ai"),
}

# ---------------------------------------------------------------------------
# Embedding
# ---------------------------------------------------------------------------
EMBEDDING_OPTIONS = {
    "EmbeddingGemma 300M (active)": "google/embeddinggemma-300m",
    "BGE Large EN v1.5":            "BAAI/bge-large-en-v1.5",
    "Multilingual E5 Large":        "intfloat/multilingual-e5-large",
}
DEFAULT_EMBEDDING = "EmbeddingGemma 300M (active)"
EMBEDDING_MODEL   = EMBEDDING_OPTIONS[DEFAULT_EMBEDDING]

# ---------------------------------------------------------------------------
# RAG
# ---------------------------------------------------------------------------
CHUNK_SIZE        = 1000
CHUNK_OVERLAP     = 150
RETRIEVAL_K       = 6       # final number of chunks passed to the LLM
USE_HYBRID_SEARCH = True    # BM25 + semantic ensemble (fused with RRF)

# ---------------------------------------------------------------------------
# Reranking (2-stage retrieval)
#   Stage 1: hybrid (semantic + BM25) → fetch RETRIEVAL_FETCH_K candidates,
#            merged with Reciprocal Rank Fusion (RRF).
#   Stage 2: cross-encoder reranker scores each (query, chunk) pair jointly
#            and keeps the top RETRIEVAL_K. Highest-ROI precision boost.
# ---------------------------------------------------------------------------
USE_RERANKER      = True
RERANKER_MODEL    = "BAAI/bge-reranker-v2-m3"  # multilingual (handles ID queries)
RETRIEVAL_FETCH_K = 20      # candidates retrieved before reranking
RRF_K             = 60      # RRF damping constant (standard default)

# ---------------------------------------------------------------------------
# Query rewriting (multi-query expansion)
#   The user's phrasing is rarely the best retrieval query. An LLM generates
#   alternative phrasings; each is retrieved and the results are fused with RRF
#   before reranking — lifts recall on differently-worded questions.
# ---------------------------------------------------------------------------
# Disabled by default: the RAGAS ablation (see notebooks/rag_evaluation.ipynb)
# showed multi-query rewriting slightly *hurt* recall on this small, focused
# corpus. Kept implemented + measured; flip to True for larger/noisier corpora.
USE_QUERY_REWRITE   = False
QUERY_REWRITE_MODEL = "gemini-3.1-flash-lite"  # fast/cheap, via OpenAI-compat endpoint
N_QUERY_VARIANTS    = 3      # total queries incl. the original

# ---------------------------------------------------------------------------
# Corrective RAG + abstention
#   If the reranker's top relevance score is below the threshold, the retrieved
#   context is too weak — the system abstains instead of answering from poor
#   evidence (calibrated hallucination guard for out-of-corpus questions).
# ---------------------------------------------------------------------------
USE_CORRECTIVE_RAG     = True
# Gate on the top semantic cosine score: off-corpus questions score ~0.0 while
# in-corpus ones score >=~0.2, so cosine separates them cleanly (the reranker's
# sigmoid sits near 0.5 for both and is unsuitable as an abstain signal).
CRAG_ABSTAIN_THRESHOLD = 0.12

# Max number of *turns* (1 turn = 1 user + 1 assistant message) to keep in
# LLM history. Each RAG turn adds ~7 000 tokens (6 chunks + Q + A), so 4 turns
# ≈ 28 K tokens — safely under the 32 K limit of Gemma/Qwen3 while leaving
# room for the system prompt and the new question+context.
MAX_HISTORY_TURNS = 4

# ---------------------------------------------------------------------------
# Knowledge base sources (Project Gutenberg)
# ---------------------------------------------------------------------------
SOURCES = [
    {"philosopher": "Nietzsche",       "title": "Thus Spoke Zarathustra",                              "gutenberg_id": 1998},
    {"philosopher": "Nietzsche",       "title": "Beyond Good and Evil",                                "gutenberg_id": 4363},
    {"philosopher": "Nietzsche",       "title": "On the Genealogy of Morality",                        "gutenberg_id": 52319},
    {"philosopher": "Nietzsche",       "title": "The Birth of Tragedy",                                "gutenberg_id": 51356},
    {"philosopher": "Schopenhauer",    "title": "Essays of Arthur Schopenhauer",                       "gutenberg_id": 11945},
    {"philosopher": "Hume",            "title": "An Enquiry Concerning Human Understanding",           "gutenberg_id": 9662},
    {"philosopher": "Russell",         "title": "The Problems of Philosophy",                          "gutenberg_id": 5827},
    {"philosopher": "Marcus Aurelius", "title": "Meditations",                                         "gutenberg_id": 2680},
    {"philosopher": "Plato",           "title": "The Republic",                                        "gutenberg_id": 1497},
    {"philosopher": "Mill",            "title": "Utilitarianism",                                      "gutenberg_id": 11224},
    {"philosopher": "Epictetus",       "title": "The Enchiridion",                                     "gutenberg_id": 45109},
    {"philosopher": "Kant",            "title": "Fundamental Principles of the Metaphysic of Morals", "gutenberg_id": 5682},
]