gkdivya's picture
Update app.py
b0894d3 verified
# ====================================================
# app.py β€” State School Fuzzy Matcher (Village Enabled)
# ====================================================
import os
import json
import pandas as pd
import gradio as gr
from functools import lru_cache
from huggingface_hub import hf_hub_download
# ----------------------------------------------------
# External search logic
# ----------------------------------------------------
from searchschool import (
search_candidates,
on_search_web,
)
# ----------------------------------------------------
# CONFIG
# ----------------------------------------------------
HF_SCHOOLS_DATASET = "Apf-AI4Good/Schools"
STATE_HIERARCHY_PARQUET = "state_district_block_villages.parquet"
ADMIN_PASSWORD = os.getenv("ADMIN_PASSWORD", None)
if not ADMIN_PASSWORD:
print("⚠ ADMIN_PASSWORD not set")
# ----------------------------------------------------
# Load hierarchy ONCE from Hugging Face (Parquet)
# ----------------------------------------------------
@lru_cache(maxsize=1)
def load_state_hierarchy():
parquet_path = hf_hub_download(
repo_id=HF_SCHOOLS_DATASET,
repo_type="dataset",
filename=STATE_HIERARCHY_PARQUET,
)
df = pd.read_parquet(parquet_path)
required_cols = {"State", "District", "Block", "Village"}
missing = required_cols - set(df.columns)
if missing:
raise ValueError(f"Missing columns in parquet: {missing}")
hier = {}
for _, r in df.iterrows():
hier \
.setdefault(r["State"], {}) \
.setdefault(r["District"], {}) \
.setdefault(r["Block"], set()) \
.add(r["Village"])
# Convert sets β†’ sorted lists
for s in hier:
for d in hier[s]:
for b in hier[s][d]:
hier[s][d][b] = sorted(hier[s][d][b])
return hier
STATE_HIER = load_state_hierarchy()
STATE_CHOICES = sorted(STATE_HIER.keys())
# ----------------------------------------------------
# UI RENDER HELPERS
# ----------------------------------------------------
def render_results_md(df, show_score=True):
if df is None or df.empty:
return "❌ No results found."
headers = [
"School Name", "UDISE Code",
"State", "District", "Block", "Village"
]
if show_score:
headers.append("Score")
md = "| " + " | ".join(headers) + " |\n"
md += "| " + " | ".join(["---"] * len(headers)) + " |\n"
for _, r in df.iterrows():
row = [
str(r.get("School_Name", "")),
str(r.get("UDISE_Code", "")),
str(r.get("State", "")),
str(r.get("District", "")),
str(r.get("Block", "")),
str(r.get("Village", "")),
]
if show_score:
score = r.get("Score", "")
row.append("" if pd.isna(score) else f"{float(score):.2f}")
md += "| " + " | ".join(row) + " |\n"
return md
def render_best_md(df):
if df is None or df.empty:
return "❌ No confident best match found."
headers = [
"School Name", "UDISE Code",
"State", "District", "Block", "Village", "Score"
]
md = "| " + " | ".join(headers) + " |\n"
md += "| " + " | ".join(["---"] * len(headers)) + " |\n"
r = df.iloc[0]
row = [
str(r.get("School_Name", "")),
str(r.get("UDISE_Code", "")),
str(r.get("State", "")),
str(r.get("District", "")),
str(r.get("Block", "")),
str(r.get("Village", "")),
f"{float(r.get('Score', 0)):.2f}",
]
md += "| " + " | ".join(row) + " |\n"
return md
# ----------------------------------------------------
# Cascading dropdown handlers
# ----------------------------------------------------
def on_state_change(state):
if not state:
return (
gr.update(choices=[], value=None),
gr.update(choices=[], value=None),
gr.update(choices=[], value=None),
)
districts = sorted(STATE_HIER[state].keys())
return (
gr.update(choices=districts, value=None),
gr.update(choices=[], value=None),
gr.update(choices=[], value=None),
)
def on_district_change(state, district):
if not state or not district:
return (
gr.update(choices=[], value=None),
gr.update(choices=[], value=None),
)
blocks = sorted(STATE_HIER[state][district].keys())
return (
gr.update(choices=blocks, value=None),
gr.update(choices=[], value=None),
)
def on_block_change(state, district, block):
if not state or not district or not block:
return gr.update(choices=[], value=None)
villages = STATE_HIER[state][district][block]
return gr.update(choices=villages, value=None)
# ----------------------------------------------------
# Search wrappers
# ----------------------------------------------------
def rapid_search_and_render(name, state, district, block, village):
candidates, best = search_candidates(
query_name=name,
state=state,
district=district,
block=block,
village=village,
)
return (
render_results_md(candidates, show_score=True),
render_best_md(best),
)
def web_search_and_render(name, state, district, block, village):
results = on_search_web(
school_name=name,
state_name=state,
district=district,
block=block,
village=village,
)
return render_results_md(results, show_score=False)
# ----------------------------------------------------
# Admin unlock
# ----------------------------------------------------
def unlock_web_search(pwd):
if pwd == ADMIN_PASSWORD:
return gr.update(interactive=True), "πŸ”“ Web search unlocked."
return gr.update(interactive=False), "❌ Invalid password."
# ----------------------------------------------------
# Gradio App
# ----------------------------------------------------
with gr.Blocks(title="State School Fuzzy Matcher") as demo:
gr.Markdown("# Find School UDISE Code")
# ---------- Inputs ----------
with gr.Row():
state_dd = gr.Dropdown(
label="State",
choices=STATE_CHOICES,
value=None
)
district_dd = gr.Dropdown(
label="District",
interactive=True,
value=None
)
with gr.Row():
block_dd = gr.Dropdown(
label="Block",
interactive=True,
value=None
)
village_dd = gr.Dropdown(
label="Village",
interactive=True,
value=None
)
school_input = gr.Textbox(
label="School Name",
placeholder="Enter school name from marksheet",
)
search_btn = gr.Button("πŸ” Find Schools")
# ---------- Rapid Search ----------
gr.Markdown("### πŸ”Ž Rapid Search Results")
rapid_results_md = gr.Markdown()
gr.Markdown("### ⭐ Best Rapid Match")
best_result_md = gr.Markdown()
# ---------- Web Search ----------
gr.Markdown("### 🌐 Web Search (Locked)")
unlock_pwd = gr.Textbox(
label="Admin Password",
type="password",
)
unlock_btn = gr.Button("Unlock Web Search")
web_status = gr.Markdown("")
search_web_btn = gr.Button("Search Web", interactive=False)
web_results_md = gr.Markdown()
# ---------- Wiring ----------
state_dd.change(
on_state_change,
inputs=state_dd,
outputs=[district_dd, block_dd, village_dd],
)
district_dd.change(
on_district_change,
inputs=[state_dd, district_dd],
outputs=[block_dd, village_dd],
)
block_dd.change(
on_block_change,
inputs=[state_dd, district_dd, block_dd],
outputs=village_dd,
)
search_btn.click(
rapid_search_and_render,
inputs=[school_input, state_dd, district_dd, block_dd, village_dd],
outputs=[rapid_results_md, best_result_md],
)
unlock_btn.click(
unlock_web_search,
inputs=unlock_pwd,
outputs=[search_web_btn, web_status],
)
search_web_btn.click(
web_search_and_render,
inputs=[school_input, state_dd, district_dd, block_dd, village_dd],
outputs=web_results_md,
)
# ----------------------------------------------------
# Entry point
# ----------------------------------------------------
if __name__ == "__main__":
demo.launch()