gkdivya commited on
Commit
4cfcde5
·
verified ·
1 Parent(s): 9924eab

Update searchschool.py

Browse files
Files changed (1) hide show
  1. searchschool.py +55 -89
searchschool.py CHANGED
@@ -1,5 +1,7 @@
1
- # searchschool.py
2
- import os
 
 
3
  import pandas as pd
4
  from huggingface_hub import hf_hub_download
5
  from rapidfuzz import process, fuzz
@@ -11,6 +13,7 @@ from web_search import tavily_search_codes
11
  MASTER_SCHOOL_COL = "School_Name__c"
12
  MASTER_DISTRICT_COL = "School_District__c"
13
  MASTER_BLOCK_COL = "School_Block__c"
 
14
  MASTER_UDISE_COL = "School_Udise_Code__c"
15
  MASTER_STATE_COL = "School_State__c"
16
 
@@ -20,10 +23,10 @@ MASTER_ALL_STATES_FILE = "master_all_states.xlsx"
20
  DEFAULT_STATE_KEY = "ARUNACHAL PRADESH"
21
  MAX_CANDIDATES = 5
22
 
23
- # global cache (loaded once)
24
  master_df = None
25
 
26
- # normalization helper (lazy import to avoid circular deps)
27
  try:
28
  from admin_patterns import normalize_with_patterns_dynamic
29
  except Exception:
@@ -31,7 +34,7 @@ except Exception:
31
 
32
 
33
  # ====================================================
34
- # INTERNAL: load master CSV once
35
  # ====================================================
36
  def _load_master_if_needed():
37
  global master_df
@@ -44,7 +47,6 @@ def _load_master_if_needed():
44
  filename=MASTER_ALL_STATES_FILE,
45
  )
46
 
47
-
48
  master_df = pd.read_excel(local_path, dtype=str).fillna("")
49
 
50
 
@@ -54,16 +56,16 @@ def _load_master_if_needed():
54
  def on_search_web(
55
  school_name: str,
56
  state_name: str,
57
- district: str = None,
58
- block: str = None
 
59
  ):
60
  """
61
  1. Tavily search → list of UDISE codes
62
- 2. Lookup those UDISE codes in master_all_states.csv
63
  3. Return standardized DataFrame
64
  """
65
 
66
- # Step 1: Tavily search
67
  udise_list = tavily_search_codes(
68
  school_name=school_name,
69
  state_name=state_name,
@@ -74,16 +76,24 @@ def on_search_web(
74
 
75
  if not udise_list:
76
  return pd.DataFrame(
77
- columns=["School_Name", "State", "District", "Block", "UDISE_Code"]
 
 
 
 
78
  )
79
 
80
- # Step 2: lookup
81
- rows = get_school_rows_by_udise(state_name, udise_list)
 
82
 
83
- # Step 3: to DataFrame
84
  df = pd.DataFrame(rows)
85
 
86
- expected = ["School_Name", "State", "District", "Block", "UDISE_Code"]
 
 
 
 
87
  for col in expected:
88
  if col not in df.columns:
89
  df[col] = None
@@ -91,32 +101,43 @@ def on_search_web(
91
  return df[expected]
92
 
93
 
94
- def get_school_rows_by_udise(state_name: str, udise_codes: list[str]):
95
- """
96
- UDISE → school rows lookup from master_all_states.csv
97
- """
 
 
 
98
  if not udise_codes:
99
  return []
100
 
101
  _load_master_if_needed()
102
 
 
103
  udise_codes = {str(u) for u in udise_codes}
104
 
105
- df = master_df
106
- matched = df[df[MASTER_UDISE_COL].isin(udise_codes)]
107
 
108
  if state_name:
109
- matched = matched[
110
- matched[MASTER_STATE_COL].str.upper() == state_name.upper()
111
- ]
 
 
 
 
 
 
 
112
 
113
  rows = []
114
- for _, r in matched.iterrows():
115
  rows.append({
116
  "School_Name": r.get(MASTER_SCHOOL_COL, ""),
117
  "State": r.get(MASTER_STATE_COL, ""),
118
  "District": r.get(MASTER_DISTRICT_COL, ""),
119
  "Block": r.get(MASTER_BLOCK_COL, ""),
 
120
  "UDISE_Code": r.get(MASTER_UDISE_COL, ""),
121
  })
122
 
@@ -124,72 +145,15 @@ def get_school_rows_by_udise(state_name: str, udise_codes: list[str]):
124
 
125
 
126
  # ====================================================
127
- # MASTER LOAD FOR UI (STATE → DISTRICT → BLOCK)
128
- # ====================================================
129
- def load_master_for_state(state_key: str | None):
130
- """
131
- Load master_all_states.csv once.
132
- Filter districts by selected state.
133
- """
134
- import gradio as gr
135
-
136
- _load_master_if_needed()
137
-
138
- df = master_df
139
-
140
- if state_key:
141
- df = df[df[MASTER_STATE_COL].str.upper() == state_key.upper()]
142
-
143
- if MASTER_DISTRICT_COL in df.columns:
144
- districts = sorted(df[MASTER_DISTRICT_COL].unique().tolist())
145
- districts = ["All"] + districts
146
- else:
147
- districts = []
148
-
149
- blocks = ["All"]
150
-
151
- return (
152
- gr.Dropdown(choices=districts, value="All" if districts else None),
153
- gr.Dropdown(choices=blocks, value="All"),
154
- )
155
-
156
-
157
- def update_blocks(district: str | None):
158
- """
159
- Update block dropdown when district changes
160
- """
161
- import gradio as gr
162
-
163
- _load_master_if_needed()
164
-
165
- df = master_df
166
-
167
- if district and district != "All":
168
- df = df[df[MASTER_DISTRICT_COL] == district]
169
-
170
- if MASTER_BLOCK_COL in df.columns:
171
- blocks = sorted(df[MASTER_BLOCK_COL].unique().tolist())
172
- blocks = ["All"] + blocks if blocks else ["All"]
173
- else:
174
- blocks = ["All"]
175
-
176
- return gr.Dropdown(choices=blocks, value="All")
177
-
178
-
179
- # ====================================================
180
- # RAPIDFUZZ SEARCH
181
  # ====================================================
182
  def search_candidates(
183
  query_name: str,
184
  state_key: str | None,
185
  district: str | None,
186
  block: str | None,
 
187
  ):
188
- """
189
- Given school name + filters, return:
190
- - candidates table
191
- - best candidate table
192
- """
193
  global normalize_with_patterns_dynamic
194
 
195
  if normalize_with_patterns_dynamic is None:
@@ -202,18 +166,19 @@ def search_candidates(
202
 
203
  df = master_df
204
 
205
- # Filter by state
206
  if state_key:
207
  df = df[df[MASTER_STATE_COL].str.upper() == state_key.upper()]
208
 
209
- # Filter by district
210
- if district and district != "All":
211
  df = df[df[MASTER_DISTRICT_COL] == district]
212
 
213
- # Filter by block
214
- if block and block != "All":
215
  df = df[df[MASTER_BLOCK_COL] == block]
216
 
 
 
 
217
  if df.empty:
218
  return pd.DataFrame(), pd.DataFrame()
219
 
@@ -239,6 +204,7 @@ def search_candidates(
239
  "State": r.get(MASTER_STATE_COL, ""),
240
  "District": r.get(MASTER_DISTRICT_COL, ""),
241
  "Block": r.get(MASTER_BLOCK_COL, ""),
 
242
  "UDISE_Code": r.get(MASTER_UDISE_COL, ""),
243
  "Score": score,
244
  })
 
1
+ # ====================================================
2
+ # searchschool.py — Village Enabled
3
+ # ====================================================
4
+
5
  import pandas as pd
6
  from huggingface_hub import hf_hub_download
7
  from rapidfuzz import process, fuzz
 
13
  MASTER_SCHOOL_COL = "School_Name__c"
14
  MASTER_DISTRICT_COL = "School_District__c"
15
  MASTER_BLOCK_COL = "School_Block__c"
16
+ MASTER_VILLAGE_COL = "School_Village__c"
17
  MASTER_UDISE_COL = "School_Udise_Code__c"
18
  MASTER_STATE_COL = "School_State__c"
19
 
 
23
  DEFAULT_STATE_KEY = "ARUNACHAL PRADESH"
24
  MAX_CANDIDATES = 5
25
 
26
+ # global cache
27
  master_df = None
28
 
29
+ # normalization helper
30
  try:
31
  from admin_patterns import normalize_with_patterns_dynamic
32
  except Exception:
 
34
 
35
 
36
  # ====================================================
37
+ # INTERNAL: load master once
38
  # ====================================================
39
  def _load_master_if_needed():
40
  global master_df
 
47
  filename=MASTER_ALL_STATES_FILE,
48
  )
49
 
 
50
  master_df = pd.read_excel(local_path, dtype=str).fillna("")
51
 
52
 
 
56
  def on_search_web(
57
  school_name: str,
58
  state_name: str,
59
+ district: str | None = None,
60
+ block: str | None = None,
61
+ village: str | None = None,
62
  ):
63
  """
64
  1. Tavily search → list of UDISE codes
65
+ 2. Lookup those UDISE codes in master
66
  3. Return standardized DataFrame
67
  """
68
 
 
69
  udise_list = tavily_search_codes(
70
  school_name=school_name,
71
  state_name=state_name,
 
76
 
77
  if not udise_list:
78
  return pd.DataFrame(
79
+ columns=[
80
+ "School_Name", "State",
81
+ "District", "Block", "Village",
82
+ "UDISE_Code"
83
+ ]
84
  )
85
 
86
+ rows = get_school_rows_by_udise(
87
+ state_name, udise_list, district, block, village
88
+ )
89
 
 
90
  df = pd.DataFrame(rows)
91
 
92
+ expected = [
93
+ "School_Name", "State",
94
+ "District", "Block", "Village",
95
+ "UDISE_Code"
96
+ ]
97
  for col in expected:
98
  if col not in df.columns:
99
  df[col] = None
 
101
  return df[expected]
102
 
103
 
104
+ def get_school_rows_by_udise(
105
+ state_name: str,
106
+ udise_codes: list[str],
107
+ district: str | None = None,
108
+ block: str | None = None,
109
+ village: str | None = None,
110
+ ):
111
  if not udise_codes:
112
  return []
113
 
114
  _load_master_if_needed()
115
 
116
+ df = master_df
117
  udise_codes = {str(u) for u in udise_codes}
118
 
119
+ df = df[df[MASTER_UDISE_COL].isin(udise_codes)]
 
120
 
121
  if state_name:
122
+ df = df[df[MASTER_STATE_COL].str.upper() == state_name.upper()]
123
+
124
+ if district:
125
+ df = df[df[MASTER_DISTRICT_COL] == district]
126
+
127
+ if block:
128
+ df = df[df[MASTER_BLOCK_COL] == block]
129
+
130
+ if village and MASTER_VILLAGE_COL in df.columns:
131
+ df = df[df[MASTER_VILLAGE_COL] == village]
132
 
133
  rows = []
134
+ for _, r in df.iterrows():
135
  rows.append({
136
  "School_Name": r.get(MASTER_SCHOOL_COL, ""),
137
  "State": r.get(MASTER_STATE_COL, ""),
138
  "District": r.get(MASTER_DISTRICT_COL, ""),
139
  "Block": r.get(MASTER_BLOCK_COL, ""),
140
+ "Village": r.get(MASTER_VILLAGE_COL, ""),
141
  "UDISE_Code": r.get(MASTER_UDISE_COL, ""),
142
  })
143
 
 
145
 
146
 
147
  # ====================================================
148
+ # RAPIDFUZZ SEARCH (Village-aware)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
149
  # ====================================================
150
  def search_candidates(
151
  query_name: str,
152
  state_key: str | None,
153
  district: str | None,
154
  block: str | None,
155
+ village: str | None = None,
156
  ):
 
 
 
 
 
157
  global normalize_with_patterns_dynamic
158
 
159
  if normalize_with_patterns_dynamic is None:
 
166
 
167
  df = master_df
168
 
169
+ # -------- Filters --------
170
  if state_key:
171
  df = df[df[MASTER_STATE_COL].str.upper() == state_key.upper()]
172
 
173
+ if district:
 
174
  df = df[df[MASTER_DISTRICT_COL] == district]
175
 
176
+ if block:
 
177
  df = df[df[MASTER_BLOCK_COL] == block]
178
 
179
+ if village and MASTER_VILLAGE_COL in df.columns:
180
+ df = df[df[MASTER_VILLAGE_COL] == village]
181
+
182
  if df.empty:
183
  return pd.DataFrame(), pd.DataFrame()
184
 
 
204
  "State": r.get(MASTER_STATE_COL, ""),
205
  "District": r.get(MASTER_DISTRICT_COL, ""),
206
  "Block": r.get(MASTER_BLOCK_COL, ""),
207
+ "Village": r.get(MASTER_VILLAGE_COL, ""),
208
  "UDISE_Code": r.get(MASTER_UDISE_COL, ""),
209
  "Score": score,
210
  })