gkdivya commited on
Commit
9924eab
·
verified ·
1 Parent(s): f791a9a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +118 -87
app.py CHANGED
@@ -1,12 +1,16 @@
 
 
 
 
1
  import os
 
2
  import pandas as pd
3
  import gradio as gr
4
- from huggingface_hub import hf_hub_download
5
- import json
6
  from functools import lru_cache
 
7
 
8
  # ----------------------------------------------------
9
- # Imports
10
  # ----------------------------------------------------
11
  from searchschool import (
12
  search_candidates,
@@ -17,51 +21,61 @@ from searchschool import (
17
  # CONFIG
18
  # ----------------------------------------------------
19
  HF_SCHOOLS_DATASET = "Apf-AI4Good/Schools"
20
- MASTER_ALL_STATES_FILE = "master_all_states.csv"
21
- MASTER_STATE_COL = "School_State__c"
22
 
23
  ADMIN_PASSWORD = os.getenv("ADMIN_PASSWORD", None)
24
  if not ADMIN_PASSWORD:
25
  print("⚠ ADMIN_PASSWORD not set")
26
 
27
  # ----------------------------------------------------
28
- # Load states ONCE
29
  # ----------------------------------------------------
30
  @lru_cache(maxsize=1)
31
  def load_state_hierarchy():
32
- path = hf_hub_download(
33
  repo_id=HF_SCHOOLS_DATASET,
34
  repo_type="dataset",
35
- filename="state_district_block.json",
36
  )
37
- with open(path, "r", encoding="utf-8") as f:
38
- return json.load(f)
39
 
40
- STATE_HIER = load_state_hierarchy()
41
- STATE_CHOICES = sorted(STATE_HIER.keys())
42
 
 
 
 
 
43
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
44
 
45
  # ----------------------------------------------------
46
  # UI RENDER HELPERS
47
  # ----------------------------------------------------
48
- def render_header(show_score=True):
49
- with gr.Row():
50
- gr.Markdown("**School Name**")
51
- gr.Markdown("**UDISE Code**")
52
- gr.Markdown("**State**")
53
- gr.Markdown("**District**")
54
- gr.Markdown("**Block**")
55
- if show_score:
56
- gr.Markdown("**Score**")
57
-
58
-
59
  def render_results_md(df, show_score=True):
60
  if df is None or df.empty:
61
  return "❌ No results found."
62
 
63
- # Header
64
- headers = ["School Name", "UDISE Code", "State", "District", "Block"]
 
 
65
  if show_score:
66
  headers.append("Score")
67
 
@@ -75,6 +89,7 @@ def render_results_md(df, show_score=True):
75
  str(r.get("State", "")),
76
  str(r.get("District", "")),
77
  str(r.get("Block", "")),
 
78
  ]
79
  if show_score:
80
  score = r.get("Score", "")
@@ -84,77 +99,102 @@ def render_results_md(df, show_score=True):
84
 
85
  return md
86
 
 
87
  def render_best_md(df):
88
  if df is None or df.empty:
89
  return "❌ No confident best match found."
90
 
91
- headers = ["School Name", "UDISE Code", "State", "District", "Block", "Score"]
 
 
 
92
 
93
  md = "| " + " | ".join(headers) + " |\n"
94
  md += "| " + " | ".join(["---"] * len(headers)) + " |\n"
95
 
96
  r = df.iloc[0]
97
-
98
  row = [
99
  str(r.get("School_Name", "")),
100
  str(r.get("UDISE_Code", "")),
101
  str(r.get("State", "")),
102
  str(r.get("District", "")),
103
  str(r.get("Block", "")),
 
104
  f"{float(r.get('Score', 0)):.2f}",
105
  ]
106
 
107
  md += "| " + " | ".join(row) + " |\n"
108
-
109
  return md
110
 
111
-
112
-
113
-
114
  def on_state_change(state):
115
  if not state:
116
  return (
117
  gr.update(choices=[], value=None),
118
  gr.update(choices=[], value=None),
 
119
  )
120
 
121
- districts = sorted(STATE_HIER.get(state, {}).keys())
122
-
123
  return (
124
  gr.update(choices=districts, value=None),
125
  gr.update(choices=[], value=None),
 
126
  )
127
 
 
128
  def on_district_change(state, district):
129
  if not state or not district:
130
- return gr.update(choices=[], value=None)
131
-
132
- blocks = STATE_HIER.get(state, {}).get(district, [])
 
133
 
134
- return gr.update(choices=blocks, value=None)
 
 
 
 
135
 
136
 
 
 
 
137
 
 
 
138
 
139
  # ----------------------------------------------------
140
- # Wrapped search functions
141
  # ----------------------------------------------------
142
- def rapid_search_and_render(name, state, district, block):
143
- candidates, best = search_candidates(name, state, district, block)
144
-
145
- table_md = render_results_md(candidates, show_score=True)
146
- best_md = render_best_md(best)
147
-
148
- return table_md, best_md
 
 
 
 
 
149
 
150
 
151
- def web_search_and_render(name, state, district, block):
152
- results = on_search_web(name, state, district, block)
 
 
 
 
 
 
153
  return render_results_md(results, show_score=False)
154
 
155
-
156
  # ----------------------------------------------------
157
- # Unlock Web Search
158
  # ----------------------------------------------------
159
  def unlock_web_search(pwd):
160
  if pwd == ADMIN_PASSWORD:
@@ -165,24 +205,13 @@ def unlock_web_search(pwd):
165
  # Gradio App
166
  # ----------------------------------------------------
167
  with gr.Blocks(title="State School Fuzzy Matcher") as demo:
168
- gr.Markdown(
169
- """
170
- # Find School UDISE Code
171
-
172
- """
173
- )
174
-
175
- # ================= SEARCH INPUTS =================
176
- with gr.Row():
177
- state_dd = gr.Dropdown(
178
- label="State",
179
- choices=STATE_CHOICES,
180
- value=None,
181
- )
182
 
183
- with gr.Row():
184
- district_dd = gr.Dropdown(label="District", interactive=True)
185
- block_dd = gr.Dropdown(label="Block", interactive=True)
 
 
186
 
187
  school_input = gr.Textbox(
188
  label="School Name",
@@ -191,24 +220,18 @@ with gr.Blocks(title="State School Fuzzy Matcher") as demo:
191
 
192
  search_btn = gr.Button("🔍 Find Schools")
193
 
194
- # ================= RAPID RESULTS =================
195
  gr.Markdown("### 🔎 Rapid Search Results")
196
  rapid_results_md = gr.Markdown()
197
 
198
  gr.Markdown("### ⭐ Best Rapid Match")
199
  best_result_md = gr.Markdown()
200
 
201
-
202
- # ================= WEB SEARCH =================
203
  gr.Markdown("### 🌐 Web Search (Locked)")
204
- gr.Markdown(
205
- "Use only if Rapid search does not give a confident match."
206
- )
207
-
208
  unlock_pwd = gr.Textbox(
209
  label="Admin Password",
210
  type="password",
211
- placeholder="Enter password to unlock web search",
212
  )
213
  unlock_btn = gr.Button("Unlock Web Search")
214
  web_status = gr.Markdown("")
@@ -216,37 +239,45 @@ with gr.Blocks(title="State School Fuzzy Matcher") as demo:
216
  search_web_btn = gr.Button("Search Web", interactive=False)
217
  web_results_md = gr.Markdown()
218
 
219
- # ================= Wiring =================
220
  state_dd.change(
221
- fn=on_state_change,
222
  inputs=state_dd,
223
- outputs=[district_dd, block_dd],
224
  )
225
-
226
  district_dd.change(
227
- fn=on_district_change,
228
  inputs=[state_dd, district_dd],
229
- outputs=block_dd,
230
  )
231
 
 
 
 
 
 
232
 
233
  search_btn.click(
234
- fn=rapid_search_and_render,
235
- inputs=[school_input, state_dd, district_dd, block_dd],
236
- outputs=[rapid_results_md, best_result_md],
237
  )
238
 
239
  unlock_btn.click(
240
- fn=unlock_web_search,
241
  inputs=unlock_pwd,
242
  outputs=[search_web_btn, web_status],
243
  )
244
 
245
  search_web_btn.click(
246
- fn=web_search_and_render,
247
- inputs=[school_input, state_dd, district_dd, block_dd],
248
- outputs=web_results_md,
249
  )
250
 
 
 
 
251
  if __name__ == "__main__":
252
  demo.launch()
 
1
+ # ====================================================
2
+ # app.py — State School Fuzzy Matcher (Village Enabled)
3
+ # ====================================================
4
+
5
  import os
6
+ import json
7
  import pandas as pd
8
  import gradio as gr
 
 
9
  from functools import lru_cache
10
+ from huggingface_hub import hf_hub_download
11
 
12
  # ----------------------------------------------------
13
+ # External search logic
14
  # ----------------------------------------------------
15
  from searchschool import (
16
  search_candidates,
 
21
  # CONFIG
22
  # ----------------------------------------------------
23
  HF_SCHOOLS_DATASET = "Apf-AI4Good/Schools"
24
+ STATE_HIERARCHY_PARQUET = "state_district_block_villages.parquet"
 
25
 
26
  ADMIN_PASSWORD = os.getenv("ADMIN_PASSWORD", None)
27
  if not ADMIN_PASSWORD:
28
  print("⚠ ADMIN_PASSWORD not set")
29
 
30
  # ----------------------------------------------------
31
+ # Load hierarchy ONCE from Hugging Face (Parquet)
32
  # ----------------------------------------------------
33
  @lru_cache(maxsize=1)
34
  def load_state_hierarchy():
35
+ parquet_path = hf_hub_download(
36
  repo_id=HF_SCHOOLS_DATASET,
37
  repo_type="dataset",
38
+ filename=STATE_HIERARCHY_PARQUET,
39
  )
 
 
40
 
41
+ df = pd.read_parquet(parquet_path)
 
42
 
43
+ required_cols = {"State", "District", "Block", "Village"}
44
+ missing = required_cols - set(df.columns)
45
+ if missing:
46
+ raise ValueError(f"Missing columns in parquet: {missing}")
47
 
48
+ hier = {}
49
+ for _, r in df.iterrows():
50
+ hier \
51
+ .setdefault(r["State"], {}) \
52
+ .setdefault(r["District"], {}) \
53
+ .setdefault(r["Block"], set()) \
54
+ .add(r["Village"])
55
+
56
+ # Convert sets → sorted lists
57
+ for s in hier:
58
+ for d in hier[s]:
59
+ for b in hier[s][d]:
60
+ hier[s][d][b] = sorted(hier[s][d][b])
61
+
62
+ return hier
63
+
64
+
65
+ STATE_HIER = load_state_hierarchy()
66
+ STATE_CHOICES = sorted(STATE_HIER.keys())
67
 
68
  # ----------------------------------------------------
69
  # UI RENDER HELPERS
70
  # ----------------------------------------------------
 
 
 
 
 
 
 
 
 
 
 
71
  def render_results_md(df, show_score=True):
72
  if df is None or df.empty:
73
  return "❌ No results found."
74
 
75
+ headers = [
76
+ "School Name", "UDISE Code",
77
+ "State", "District", "Block", "Village"
78
+ ]
79
  if show_score:
80
  headers.append("Score")
81
 
 
89
  str(r.get("State", "")),
90
  str(r.get("District", "")),
91
  str(r.get("Block", "")),
92
+ str(r.get("Village", "")),
93
  ]
94
  if show_score:
95
  score = r.get("Score", "")
 
99
 
100
  return md
101
 
102
+
103
  def render_best_md(df):
104
  if df is None or df.empty:
105
  return "❌ No confident best match found."
106
 
107
+ headers = [
108
+ "School Name", "UDISE Code",
109
+ "State", "District", "Block", "Village", "Score"
110
+ ]
111
 
112
  md = "| " + " | ".join(headers) + " |\n"
113
  md += "| " + " | ".join(["---"] * len(headers)) + " |\n"
114
 
115
  r = df.iloc[0]
 
116
  row = [
117
  str(r.get("School_Name", "")),
118
  str(r.get("UDISE_Code", "")),
119
  str(r.get("State", "")),
120
  str(r.get("District", "")),
121
  str(r.get("Block", "")),
122
+ str(r.get("Village", "")),
123
  f"{float(r.get('Score', 0)):.2f}",
124
  ]
125
 
126
  md += "| " + " | ".join(row) + " |\n"
 
127
  return md
128
 
129
+ # ----------------------------------------------------
130
+ # Cascading dropdown handlers
131
+ # ----------------------------------------------------
132
  def on_state_change(state):
133
  if not state:
134
  return (
135
  gr.update(choices=[], value=None),
136
  gr.update(choices=[], value=None),
137
+ gr.update(choices=[], value=None),
138
  )
139
 
140
+ districts = sorted(STATE_HIER[state].keys())
 
141
  return (
142
  gr.update(choices=districts, value=None),
143
  gr.update(choices=[], value=None),
144
+ gr.update(choices=[], value=None),
145
  )
146
 
147
+
148
  def on_district_change(state, district):
149
  if not state or not district:
150
+ return (
151
+ gr.update(choices=[], value=None),
152
+ gr.update(choices=[], value=None),
153
+ )
154
 
155
+ blocks = sorted(STATE_HIER[state][district].keys())
156
+ return (
157
+ gr.update(choices=blocks, value=None),
158
+ gr.update(choices=[], value=None),
159
+ )
160
 
161
 
162
+ def on_block_change(state, district, block):
163
+ if not state or not district or not block:
164
+ return gr.update(choices=[], value=None)
165
 
166
+ villages = STATE_HIER[state][district][block]
167
+ return gr.update(choices=villages, value=None)
168
 
169
  # ----------------------------------------------------
170
+ # Search wrappers
171
  # ----------------------------------------------------
172
+ def rapid_search_and_render(name, state, district, block, village):
173
+ candidates, best = search_candidates(
174
+ name=name,
175
+ state=state,
176
+ district=district,
177
+ block=block,
178
+ village=village,
179
+ )
180
+ return (
181
+ render_results_md(candidates, show_score=True),
182
+ render_best_md(best),
183
+ )
184
 
185
 
186
+ def web_search_and_render(name, state, district, block, village):
187
+ results = on_search_web(
188
+ name=name,
189
+ state=state,
190
+ district=district,
191
+ block=block,
192
+ village=village,
193
+ )
194
  return render_results_md(results, show_score=False)
195
 
 
196
  # ----------------------------------------------------
197
+ # Admin unlock
198
  # ----------------------------------------------------
199
  def unlock_web_search(pwd):
200
  if pwd == ADMIN_PASSWORD:
 
205
  # Gradio App
206
  # ----------------------------------------------------
207
  with gr.Blocks(title="State School Fuzzy Matcher") as demo:
208
+ gr.Markdown("# Find School UDISE Code")
 
 
 
 
 
 
 
 
 
 
 
 
 
209
 
210
+ # ---------- Inputs ----------
211
+ state_dd = gr.Dropdown(label="State", choices=STATE_CHOICES)
212
+ district_dd = gr.Dropdown(label="District", interactive=True)
213
+ block_dd = gr.Dropdown(label="Block", interactive=True)
214
+ village_dd = gr.Dropdown(label="Village", interactive=True)
215
 
216
  school_input = gr.Textbox(
217
  label="School Name",
 
220
 
221
  search_btn = gr.Button("🔍 Find Schools")
222
 
223
+ # ---------- Rapid Search ----------
224
  gr.Markdown("### 🔎 Rapid Search Results")
225
  rapid_results_md = gr.Markdown()
226
 
227
  gr.Markdown("### ⭐ Best Rapid Match")
228
  best_result_md = gr.Markdown()
229
 
230
+ # ---------- Web Search ----------
 
231
  gr.Markdown("### 🌐 Web Search (Locked)")
 
 
 
 
232
  unlock_pwd = gr.Textbox(
233
  label="Admin Password",
234
  type="password",
 
235
  )
236
  unlock_btn = gr.Button("Unlock Web Search")
237
  web_status = gr.Markdown("")
 
239
  search_web_btn = gr.Button("Search Web", interactive=False)
240
  web_results_md = gr.Markdown()
241
 
242
+ # ---------- Wiring ----------
243
  state_dd.change(
244
+ on_state_change,
245
  inputs=state_dd,
246
+ outputs=[district_dd, block_dd, village_dd],
247
  )
248
+
249
  district_dd.change(
250
+ on_district_change,
251
  inputs=[state_dd, district_dd],
252
+ outputs=[block_dd, village_dd],
253
  )
254
 
255
+ block_dd.change(
256
+ on_block_change,
257
+ inputs=[state_dd, district_dd, block_dd],
258
+ outputs=village_dd,
259
+ )
260
 
261
  search_btn.click(
262
+ rapid_search_and_render,
263
+ inputs=[school_input, state_dd, district_dd, block_dd, village_dd],
264
+ outputs=[rapid_results_md, best_result_md],
265
  )
266
 
267
  unlock_btn.click(
268
+ unlock_web_search,
269
  inputs=unlock_pwd,
270
  outputs=[search_web_btn, web_status],
271
  )
272
 
273
  search_web_btn.click(
274
+ web_search_and_render,
275
+ inputs=[school_input, state_dd, district_dd, block_dd, village_dd],
276
+ outputs=web_results_md,
277
  )
278
 
279
+ # ----------------------------------------------------
280
+ # Entry point
281
+ # ----------------------------------------------------
282
  if __name__ == "__main__":
283
  demo.launch()