# ==================================================== # app.py — State School Fuzzy Matcher (Village Enabled) # ==================================================== import os import json import pandas as pd import gradio as gr from functools import lru_cache from huggingface_hub import hf_hub_download # ---------------------------------------------------- # External search logic # ---------------------------------------------------- from searchschool import ( search_candidates, on_search_web, ) # ---------------------------------------------------- # CONFIG # ---------------------------------------------------- HF_SCHOOLS_DATASET = "Apf-AI4Good/Schools" STATE_HIERARCHY_PARQUET = "state_district_block_villages.parquet" ADMIN_PASSWORD = os.getenv("ADMIN_PASSWORD", None) if not ADMIN_PASSWORD: print("⚠ ADMIN_PASSWORD not set") # ---------------------------------------------------- # Load hierarchy ONCE from Hugging Face (Parquet) # ---------------------------------------------------- @lru_cache(maxsize=1) def load_state_hierarchy(): parquet_path = hf_hub_download( repo_id=HF_SCHOOLS_DATASET, repo_type="dataset", filename=STATE_HIERARCHY_PARQUET, ) df = pd.read_parquet(parquet_path) required_cols = {"State", "District", "Block", "Village"} missing = required_cols - set(df.columns) if missing: raise ValueError(f"Missing columns in parquet: {missing}") hier = {} for _, r in df.iterrows(): hier \ .setdefault(r["State"], {}) \ .setdefault(r["District"], {}) \ .setdefault(r["Block"], set()) \ .add(r["Village"]) # Convert sets → sorted lists for s in hier: for d in hier[s]: for b in hier[s][d]: hier[s][d][b] = sorted(hier[s][d][b]) return hier STATE_HIER = load_state_hierarchy() STATE_CHOICES = sorted(STATE_HIER.keys()) # ---------------------------------------------------- # UI RENDER HELPERS # ---------------------------------------------------- def render_results_md(df, show_score=True): if df is None or df.empty: return "❌ No results found." headers = [ "School Name", "UDISE Code", "State", "District", "Block", "Village" ] if show_score: headers.append("Score") md = "| " + " | ".join(headers) + " |\n" md += "| " + " | ".join(["---"] * len(headers)) + " |\n" for _, r in df.iterrows(): row = [ str(r.get("School_Name", "")), str(r.get("UDISE_Code", "")), str(r.get("State", "")), str(r.get("District", "")), str(r.get("Block", "")), str(r.get("Village", "")), ] if show_score: score = r.get("Score", "") row.append("" if pd.isna(score) else f"{float(score):.2f}") md += "| " + " | ".join(row) + " |\n" return md def render_best_md(df): if df is None or df.empty: return "❌ No confident best match found." headers = [ "School Name", "UDISE Code", "State", "District", "Block", "Village", "Score" ] md = "| " + " | ".join(headers) + " |\n" md += "| " + " | ".join(["---"] * len(headers)) + " |\n" r = df.iloc[0] row = [ str(r.get("School_Name", "")), str(r.get("UDISE_Code", "")), str(r.get("State", "")), str(r.get("District", "")), str(r.get("Block", "")), str(r.get("Village", "")), f"{float(r.get('Score', 0)):.2f}", ] md += "| " + " | ".join(row) + " |\n" return md # ---------------------------------------------------- # Cascading dropdown handlers # ---------------------------------------------------- def on_state_change(state): if not state: return ( gr.update(choices=[], value=None), gr.update(choices=[], value=None), gr.update(choices=[], value=None), ) districts = sorted(STATE_HIER[state].keys()) return ( gr.update(choices=districts, value=None), gr.update(choices=[], value=None), gr.update(choices=[], value=None), ) def on_district_change(state, district): if not state or not district: return ( gr.update(choices=[], value=None), gr.update(choices=[], value=None), ) blocks = sorted(STATE_HIER[state][district].keys()) return ( gr.update(choices=blocks, value=None), gr.update(choices=[], value=None), ) def on_block_change(state, district, block): if not state or not district or not block: return gr.update(choices=[], value=None) villages = STATE_HIER[state][district][block] return gr.update(choices=villages, value=None) # ---------------------------------------------------- # Search wrappers # ---------------------------------------------------- def rapid_search_and_render(name, state, district, block, village): candidates, best = search_candidates( query_name=name, state=state, district=district, block=block, village=village, ) return ( render_results_md(candidates, show_score=True), render_best_md(best), ) def web_search_and_render(name, state, district, block, village): results = on_search_web( school_name=name, state_name=state, district=district, block=block, village=village, ) return render_results_md(results, show_score=False) # ---------------------------------------------------- # Admin unlock # ---------------------------------------------------- def unlock_web_search(pwd): if pwd == ADMIN_PASSWORD: return gr.update(interactive=True), "🔓 Web search unlocked." return gr.update(interactive=False), "❌ Invalid password." # ---------------------------------------------------- # Gradio App # ---------------------------------------------------- with gr.Blocks(title="State School Fuzzy Matcher") as demo: gr.Markdown("# Find School UDISE Code") # ---------- Inputs ---------- with gr.Row(): state_dd = gr.Dropdown( label="State", choices=STATE_CHOICES, value=None ) district_dd = gr.Dropdown( label="District", interactive=True, value=None ) with gr.Row(): block_dd = gr.Dropdown( label="Block", interactive=True, value=None ) village_dd = gr.Dropdown( label="Village", interactive=True, value=None ) school_input = gr.Textbox( label="School Name", placeholder="Enter school name from marksheet", ) search_btn = gr.Button("🔍 Find Schools") # ---------- Rapid Search ---------- gr.Markdown("### 🔎 Rapid Search Results") rapid_results_md = gr.Markdown() gr.Markdown("### ⭐ Best Rapid Match") best_result_md = gr.Markdown() # ---------- Web Search ---------- gr.Markdown("### 🌐 Web Search (Locked)") unlock_pwd = gr.Textbox( label="Admin Password", type="password", ) unlock_btn = gr.Button("Unlock Web Search") web_status = gr.Markdown("") search_web_btn = gr.Button("Search Web", interactive=False) web_results_md = gr.Markdown() # ---------- Wiring ---------- state_dd.change( on_state_change, inputs=state_dd, outputs=[district_dd, block_dd, village_dd], ) district_dd.change( on_district_change, inputs=[state_dd, district_dd], outputs=[block_dd, village_dd], ) block_dd.change( on_block_change, inputs=[state_dd, district_dd, block_dd], outputs=village_dd, ) search_btn.click( rapid_search_and_render, inputs=[school_input, state_dd, district_dd, block_dd, village_dd], outputs=[rapid_results_md, best_result_md], ) unlock_btn.click( unlock_web_search, inputs=unlock_pwd, outputs=[search_web_btn, web_status], ) search_web_btn.click( web_search_and_render, inputs=[school_input, state_dd, district_dd, block_dd, village_dd], outputs=web_results_md, ) # ---------------------------------------------------- # Entry point # ---------------------------------------------------- if __name__ == "__main__": demo.launch()