Spaces:
Running
Running
| # ==================================================== | |
| # app.py β State School Fuzzy Matcher (Village Enabled) | |
| # ==================================================== | |
| import os | |
| import json | |
| import pandas as pd | |
| import gradio as gr | |
| from functools import lru_cache | |
| from huggingface_hub import hf_hub_download | |
| # ---------------------------------------------------- | |
| # External search logic | |
| # ---------------------------------------------------- | |
| from searchschool import ( | |
| search_candidates, | |
| on_search_web, | |
| ) | |
| # ---------------------------------------------------- | |
| # CONFIG | |
| # ---------------------------------------------------- | |
| HF_SCHOOLS_DATASET = "Apf-AI4Good/Schools" | |
| STATE_HIERARCHY_PARQUET = "state_district_block_villages.parquet" | |
| ADMIN_PASSWORD = os.getenv("ADMIN_PASSWORD", None) | |
| if not ADMIN_PASSWORD: | |
| print("β ADMIN_PASSWORD not set") | |
| # ---------------------------------------------------- | |
| # Load hierarchy ONCE from Hugging Face (Parquet) | |
| # ---------------------------------------------------- | |
| def load_state_hierarchy(): | |
| parquet_path = hf_hub_download( | |
| repo_id=HF_SCHOOLS_DATASET, | |
| repo_type="dataset", | |
| filename=STATE_HIERARCHY_PARQUET, | |
| ) | |
| df = pd.read_parquet(parquet_path) | |
| required_cols = {"State", "District", "Block", "Village"} | |
| missing = required_cols - set(df.columns) | |
| if missing: | |
| raise ValueError(f"Missing columns in parquet: {missing}") | |
| hier = {} | |
| for _, r in df.iterrows(): | |
| hier \ | |
| .setdefault(r["State"], {}) \ | |
| .setdefault(r["District"], {}) \ | |
| .setdefault(r["Block"], set()) \ | |
| .add(r["Village"]) | |
| # Convert sets β sorted lists | |
| for s in hier: | |
| for d in hier[s]: | |
| for b in hier[s][d]: | |
| hier[s][d][b] = sorted(hier[s][d][b]) | |
| return hier | |
| STATE_HIER = load_state_hierarchy() | |
| STATE_CHOICES = sorted(STATE_HIER.keys()) | |
| # ---------------------------------------------------- | |
| # UI RENDER HELPERS | |
| # ---------------------------------------------------- | |
| def render_results_md(df, show_score=True): | |
| if df is None or df.empty: | |
| return "β No results found." | |
| headers = [ | |
| "School Name", "UDISE Code", | |
| "State", "District", "Block", "Village" | |
| ] | |
| if show_score: | |
| headers.append("Score") | |
| md = "| " + " | ".join(headers) + " |\n" | |
| md += "| " + " | ".join(["---"] * len(headers)) + " |\n" | |
| for _, r in df.iterrows(): | |
| row = [ | |
| str(r.get("School_Name", "")), | |
| str(r.get("UDISE_Code", "")), | |
| str(r.get("State", "")), | |
| str(r.get("District", "")), | |
| str(r.get("Block", "")), | |
| str(r.get("Village", "")), | |
| ] | |
| if show_score: | |
| score = r.get("Score", "") | |
| row.append("" if pd.isna(score) else f"{float(score):.2f}") | |
| md += "| " + " | ".join(row) + " |\n" | |
| return md | |
| def render_best_md(df): | |
| if df is None or df.empty: | |
| return "β No confident best match found." | |
| headers = [ | |
| "School Name", "UDISE Code", | |
| "State", "District", "Block", "Village", "Score" | |
| ] | |
| md = "| " + " | ".join(headers) + " |\n" | |
| md += "| " + " | ".join(["---"] * len(headers)) + " |\n" | |
| r = df.iloc[0] | |
| row = [ | |
| str(r.get("School_Name", "")), | |
| str(r.get("UDISE_Code", "")), | |
| str(r.get("State", "")), | |
| str(r.get("District", "")), | |
| str(r.get("Block", "")), | |
| str(r.get("Village", "")), | |
| f"{float(r.get('Score', 0)):.2f}", | |
| ] | |
| md += "| " + " | ".join(row) + " |\n" | |
| return md | |
| # ---------------------------------------------------- | |
| # Cascading dropdown handlers | |
| # ---------------------------------------------------- | |
| def on_state_change(state): | |
| if not state: | |
| return ( | |
| gr.update(choices=[], value=None), | |
| gr.update(choices=[], value=None), | |
| gr.update(choices=[], value=None), | |
| ) | |
| districts = sorted(STATE_HIER[state].keys()) | |
| return ( | |
| gr.update(choices=districts, value=None), | |
| gr.update(choices=[], value=None), | |
| gr.update(choices=[], value=None), | |
| ) | |
| def on_district_change(state, district): | |
| if not state or not district: | |
| return ( | |
| gr.update(choices=[], value=None), | |
| gr.update(choices=[], value=None), | |
| ) | |
| blocks = sorted(STATE_HIER[state][district].keys()) | |
| return ( | |
| gr.update(choices=blocks, value=None), | |
| gr.update(choices=[], value=None), | |
| ) | |
| def on_block_change(state, district, block): | |
| if not state or not district or not block: | |
| return gr.update(choices=[], value=None) | |
| villages = STATE_HIER[state][district][block] | |
| return gr.update(choices=villages, value=None) | |
| # ---------------------------------------------------- | |
| # Search wrappers | |
| # ---------------------------------------------------- | |
| def rapid_search_and_render(name, state, district, block, village): | |
| candidates, best = search_candidates( | |
| query_name=name, | |
| state=state, | |
| district=district, | |
| block=block, | |
| village=village, | |
| ) | |
| return ( | |
| render_results_md(candidates, show_score=True), | |
| render_best_md(best), | |
| ) | |
| def web_search_and_render(name, state, district, block, village): | |
| results = on_search_web( | |
| school_name=name, | |
| state_name=state, | |
| district=district, | |
| block=block, | |
| village=village, | |
| ) | |
| return render_results_md(results, show_score=False) | |
| # ---------------------------------------------------- | |
| # Admin unlock | |
| # ---------------------------------------------------- | |
| def unlock_web_search(pwd): | |
| if pwd == ADMIN_PASSWORD: | |
| return gr.update(interactive=True), "π Web search unlocked." | |
| return gr.update(interactive=False), "β Invalid password." | |
| # ---------------------------------------------------- | |
| # Gradio App | |
| # ---------------------------------------------------- | |
| with gr.Blocks(title="State School Fuzzy Matcher") as demo: | |
| gr.Markdown("# Find School UDISE Code") | |
| # ---------- Inputs ---------- | |
| with gr.Row(): | |
| state_dd = gr.Dropdown( | |
| label="State", | |
| choices=STATE_CHOICES, | |
| value=None | |
| ) | |
| district_dd = gr.Dropdown( | |
| label="District", | |
| interactive=True, | |
| value=None | |
| ) | |
| with gr.Row(): | |
| block_dd = gr.Dropdown( | |
| label="Block", | |
| interactive=True, | |
| value=None | |
| ) | |
| village_dd = gr.Dropdown( | |
| label="Village", | |
| interactive=True, | |
| value=None | |
| ) | |
| school_input = gr.Textbox( | |
| label="School Name", | |
| placeholder="Enter school name from marksheet", | |
| ) | |
| search_btn = gr.Button("π Find Schools") | |
| # ---------- Rapid Search ---------- | |
| gr.Markdown("### π Rapid Search Results") | |
| rapid_results_md = gr.Markdown() | |
| gr.Markdown("### β Best Rapid Match") | |
| best_result_md = gr.Markdown() | |
| # ---------- Web Search ---------- | |
| gr.Markdown("### π Web Search (Locked)") | |
| unlock_pwd = gr.Textbox( | |
| label="Admin Password", | |
| type="password", | |
| ) | |
| unlock_btn = gr.Button("Unlock Web Search") | |
| web_status = gr.Markdown("") | |
| search_web_btn = gr.Button("Search Web", interactive=False) | |
| web_results_md = gr.Markdown() | |
| # ---------- Wiring ---------- | |
| state_dd.change( | |
| on_state_change, | |
| inputs=state_dd, | |
| outputs=[district_dd, block_dd, village_dd], | |
| ) | |
| district_dd.change( | |
| on_district_change, | |
| inputs=[state_dd, district_dd], | |
| outputs=[block_dd, village_dd], | |
| ) | |
| block_dd.change( | |
| on_block_change, | |
| inputs=[state_dd, district_dd, block_dd], | |
| outputs=village_dd, | |
| ) | |
| search_btn.click( | |
| rapid_search_and_render, | |
| inputs=[school_input, state_dd, district_dd, block_dd, village_dd], | |
| outputs=[rapid_results_md, best_result_md], | |
| ) | |
| unlock_btn.click( | |
| unlock_web_search, | |
| inputs=unlock_pwd, | |
| outputs=[search_web_btn, web_status], | |
| ) | |
| search_web_btn.click( | |
| web_search_and_render, | |
| inputs=[school_input, state_dd, district_dd, block_dd, village_dd], | |
| outputs=web_results_md, | |
| ) | |
| # ---------------------------------------------------- | |
| # Entry point | |
| # ---------------------------------------------------- | |
| if __name__ == "__main__": | |
| demo.launch() | |