# ====================================================
# searchschool.py — Village Enabled
# ====================================================

import pandas as pd
from huggingface_hub import hf_hub_download
from rapidfuzz import process, fuzz
from web_search import tavily_search_codes

# ====================================================
# CONFIG: columns + HF dataset
# ====================================================
MASTER_SCHOOL_COL   = "School_Name__c"
MASTER_DISTRICT_COL = "School_District__c"
MASTER_BLOCK_COL    = "School_Block__c"
MASTER_VILLAGE_COL  = "School_Village__c"
MASTER_UDISE_COL    = "School_Udise_Code__c"
MASTER_STATE_COL    = "School_State__c"

HF_SCHOOLS_DATASET = "Apf-AI4Good/Schools"
MASTER_ALL_STATES_FILE = "master_all_states.xlsx"

DEFAULT_STATE_KEY = "ARUNACHAL PRADESH"
MAX_CANDIDATES = 5

# global cache
master_df = None

# normalization helper
try:
    from admin_patterns import normalize_with_patterns_dynamic
except Exception:
    normalize_with_patterns_dynamic = None


# ====================================================
# INTERNAL: load master once
# ====================================================
def _load_master_if_needed():
    global master_df
    if master_df is not None:
        return

    local_path = hf_hub_download(
        repo_id=HF_SCHOOLS_DATASET,
        repo_type="dataset",
        filename=MASTER_ALL_STATES_FILE,
    )

    master_df = pd.read_excel(local_path, dtype=str).fillna("")


# ====================================================
# WEB SEARCH → UDISE → MASTER LOOKUP
# ====================================================
def on_search_web(
    school_name: str,
    state_name: str,
    district: str | None = None,
    block: str | None = None,
    village: str | None = None,
):
    """
    1. Tavily search → list of UDISE codes
    2. Lookup those UDISE codes in master
    3. Return standardized DataFrame
    """

    udise_list = tavily_search_codes(
        school_name=school_name,
        state_name=state_name,
        district=district,
        api_key=None,
        enforce_state_prefix=True,
    )

    if not udise_list:
        return pd.DataFrame(
            columns=[
                "School_Name", "State",
                "District", "Block", "Village",
                "UDISE_Code"
            ]
        )

    rows = get_school_rows_by_udise(
        state_name, udise_list, district, block, village
    )

    df = pd.DataFrame(rows)

    expected = [
        "School_Name", "State",
        "District", "Block", "Village",
        "UDISE_Code"
    ]
    for col in expected:
        if col not in df.columns:
            df[col] = None

    return df[expected]


def get_school_rows_by_udise(
    state_name: str,
    udise_codes: list[str],
    district: str | None = None,
    block: str | None = None,
    village: str | None = None,
):
    if not udise_codes:
        return []

    _load_master_if_needed()

    df = master_df
    udise_codes = {str(u) for u in udise_codes}

    df = df[df[MASTER_UDISE_COL].isin(udise_codes)]

    if state_name:
        df = df[df[MASTER_STATE_COL].str.upper() == state_name.upper()]

    if district:
        df = df[df[MASTER_DISTRICT_COL] == district]

    if block:
        df = df[df[MASTER_BLOCK_COL] == block]

    if village and MASTER_VILLAGE_COL in df.columns:
        df = df[df[MASTER_VILLAGE_COL] == village]

    rows = []
    for _, r in df.iterrows():
        rows.append({
            "School_Name": r.get(MASTER_SCHOOL_COL, ""),
            "State": r.get(MASTER_STATE_COL, ""),
            "District": r.get(MASTER_DISTRICT_COL, ""),
            "Block": r.get(MASTER_BLOCK_COL, ""),
            "Village": r.get(MASTER_VILLAGE_COL, ""),
            "UDISE_Code": r.get(MASTER_UDISE_COL, ""),
        })

    return rows


# ====================================================
# RAPIDFUZZ SEARCH (Village-aware)
# ====================================================
def search_candidates(
    query_name: str,
    state: str | None,
    district: str | None,
    block: str | None,
    village: str | None = None,
):
    global normalize_with_patterns_dynamic

    if normalize_with_patterns_dynamic is None:
        from admin_patterns import normalize_with_patterns_dynamic

    if not query_name:
        return pd.DataFrame(), pd.DataFrame()

    _load_master_if_needed()

    df = master_df

    # -------- Filters --------
    if state:
        df = df[df[MASTER_STATE_COL].str.upper() == state.upper()]

    if district:
        df = df[df[MASTER_DISTRICT_COL] == district]

    if block:
        df = df[df[MASTER_BLOCK_COL] == block]

    if village and MASTER_VILLAGE_COL in df.columns:
        df = df[df[MASTER_VILLAGE_COL] == village]

    if df.empty:
        return pd.DataFrame(), pd.DataFrame()

    state_for_patterns = (state or DEFAULT_STATE_KEY).upper()

    choices = df[MASTER_SCHOOL_COL].astype(str)

    candidates_raw = process.extract(
        query_name,
        choices,
        scorer=fuzz.token_set_ratio,
        processor=lambda s: normalize_with_patterns_dynamic(
            s, state_for_patterns
        ),
        limit=MAX_CANDIDATES,
    )

    rows = []
    for choice_name, score, idx in candidates_raw:
        r = df.loc[idx]
        rows.append({
            "School_Name": r.get(MASTER_SCHOOL_COL, ""),
            "State": r.get(MASTER_STATE_COL, ""),
            "District": r.get(MASTER_DISTRICT_COL, ""),
            "Block": r.get(MASTER_BLOCK_COL, ""),
            "Village": r.get(MASTER_VILLAGE_COL, ""),
            "UDISE_Code": r.get(MASTER_UDISE_COL, ""),
            "Score": score,
        })

    candidates_df = pd.DataFrame(rows)
    best_df = candidates_df.head(1).copy()

    return candidates_df, best_df