gkdivya commited on
Commit
87ecf4c
·
verified ·
1 Parent(s): c78ab8d

Create searchschool.py

Browse files
Files changed (1) hide show
  1. searchschool.py +180 -0
searchschool.py ADDED
@@ -0,0 +1,180 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # searchschool.py
2
+ import os
3
+ import pandas as pd
4
+ from huggingface_hub import hf_hub_download
5
+ from rapidfuzz import process, fuzz
6
+
7
+ # ====================================================
8
+ # CONFIG: columns, states, HF dataset
9
+ # ====================================================
10
+ MASTER_SCHOOL_COL = "School_Name__c"
11
+ MASTER_DISTRICT_COL = "School_District__c"
12
+ MASTER_BLOCK_COL = "School_Block__c"
13
+ MASTER_UDISE_COL = "School_Udise_Code__c"
14
+ MASTER_STATE_COL = "School_State__c"
15
+
16
+ HF_SCHOOLS_DATASET = "Apf-AI4Good/Schools"
17
+
18
+ # Map state keys to CSV filenames inside that dataset
19
+ STATE_HF_FILES = {
20
+ "ARUNACHAL PRADESH": "Arunachal Pradesh.csv",
21
+ # add more states as you upload them
22
+ }
23
+
24
+ DEFAULT_STATE_KEY = "ARUNACHAL PRADESH"
25
+ MAX_CANDIDATES = 5
26
+
27
+ # global cache
28
+ master_df = None
29
+
30
+ # You will import normalize_with_patterns_dynamic from admin_patterns when needed
31
+ # to avoid circular imports, main app passes runtime normalization in search_candidates
32
+ try:
33
+ from admin_patterns import normalize_with_patterns_dynamic
34
+ except Exception:
35
+ # if admin_patterns isn't importable at module import time, we will import inside functions
36
+ normalize_with_patterns_dynamic = None
37
+
38
+
39
+ def load_master_for_state(state_key: str | None):
40
+ """
41
+ Load the master CSV for a state from Hugging Face Hub (dataset repo),
42
+ set global master_df, and return District & Block dropdown configs.
43
+ """
44
+ global master_df
45
+
46
+ if not state_key:
47
+ master_df = None
48
+ return gr.Dropdown(choices=[], value=None), gr.Dropdown(choices=[], value=None) # gr referenced in app; kept for signature
49
+
50
+ state_key_norm = state_key.upper().strip()
51
+ if state_key_norm not in STATE_HF_FILES:
52
+ master_df = None
53
+ return gr.Dropdown(choices=[], value=None), gr.Dropdown(choices=[], value=None)
54
+
55
+ csv_filename = STATE_HF_FILES[state_key_norm]
56
+
57
+ # Download the CSV file from the dataset repo
58
+ local_path = hf_hub_download(
59
+ repo_id=HF_SCHOOLS_DATASET,
60
+ repo_type="dataset",
61
+ filename=csv_filename,
62
+ )
63
+
64
+ master_df = pd.read_csv(local_path, dtype=str).fillna("")
65
+
66
+ # District choices
67
+ if MASTER_DISTRICT_COL in master_df.columns:
68
+ districts = sorted(master_df[MASTER_DISTRICT_COL].dropna().unique().tolist())
69
+ districts = ["All"] + districts
70
+ else:
71
+ districts = []
72
+
73
+ # Initial blocks
74
+ blocks = ["All"] if MASTER_BLOCK_COL in master_df.columns else []
75
+
76
+ # Return gr-compatible Dropdown values (constructed in app)
77
+ # To avoid importing gr here (keeping logic separate), return lists and let app assemble Dropdowns if needed.
78
+ # However, in our app we directly return gr.Dropdown — so keep compatibility.
79
+ import gradio as gr # local import to avoid circular imports at top
80
+ return gr.Dropdown(choices=districts, value="All" if districts else None), gr.Dropdown(choices=blocks, value="All" if blocks else None)
81
+
82
+
83
+ def update_blocks(district: str | None):
84
+ """
85
+ Update Block dropdown when District changes.
86
+ """
87
+ global master_df
88
+
89
+ import gradio as gr
90
+ if master_df is None or MASTER_BLOCK_COL not in master_df.columns:
91
+ return gr.Dropdown(choices=["All"], value="All")
92
+
93
+ df = master_df
94
+ if (
95
+ district
96
+ and district != "All"
97
+ and MASTER_DISTRICT_COL in df.columns
98
+ ):
99
+ df = df[df[MASTER_DISTRICT_COL] == district]
100
+
101
+ blocks = sorted(df[MASTER_BLOCK_COL].dropna().unique().tolist())
102
+ blocks = ["All"] + blocks if blocks else ["All"]
103
+ return gr.Dropdown(choices=blocks, value="All")
104
+
105
+
106
+ def search_candidates(query_name: str, state_key: str | None, district: str | None, block: str | None):
107
+ """
108
+ Given school name + state + district + block, return:
109
+ - candidates table (top N matches)
110
+ - best-candidate table (single row)
111
+ """
112
+ global master_df, normalize_with_patterns_dynamic
113
+
114
+ # import normalize function if not loaded yet (avoids circular import)
115
+ if normalize_with_patterns_dynamic is None:
116
+ from admin_patterns import normalize_with_patterns_dynamic # local import
117
+ normalize_with_patterns_dynamic = normalize_with_patterns_dynamic
118
+
119
+ if master_df is None:
120
+ return pd.DataFrame(), pd.DataFrame()
121
+
122
+ query_name = (query_name or "").strip()
123
+ if not query_name:
124
+ return pd.DataFrame(), pd.DataFrame()
125
+
126
+ df = master_df
127
+
128
+ # Filter by district
129
+ if (
130
+ district
131
+ and district != "All"
132
+ and MASTER_DISTRICT_COL in df.columns
133
+ ):
134
+ df = df[df[MASTER_DISTRICT_COL] == district]
135
+
136
+ # Filter by block
137
+ if (
138
+ block
139
+ and block != "All"
140
+ and MASTER_BLOCK_COL in df.columns
141
+ ):
142
+ df = df[df[MASTER_BLOCK_COL] == block]
143
+
144
+ if df.empty:
145
+ return pd.DataFrame(), pd.DataFrame()
146
+
147
+ state_for_patterns = (state_key or DEFAULT_STATE_KEY).upper().strip()
148
+
149
+ choices = df[MASTER_SCHOOL_COL].astype(str)
150
+
151
+ candidates_raw = process.extract(
152
+ query_name,
153
+ choices,
154
+ scorer=fuzz.token_set_ratio,
155
+ processor=lambda s: normalize_with_patterns_dynamic(s, state_for_patterns),
156
+ limit=MAX_CANDIDATES,
157
+ ) # (choice, score, key)
158
+
159
+ if not candidates_raw:
160
+ return pd.DataFrame(), pd.DataFrame()
161
+
162
+ rows = []
163
+ for choice_name, score, key in candidates_raw:
164
+ try:
165
+ row = df.loc[key]
166
+ except Exception:
167
+ continue
168
+
169
+ rows.append({
170
+ "School_Name": row.get(MASTER_SCHOOL_COL, ""),
171
+ "State": row.get(MASTER_STATE_COL, "") if MASTER_STATE_COL in df.columns else state_for_patterns,
172
+ "District": row.get(MASTER_DISTRICT_COL, "") if MASTER_DISTRICT_COL in df.columns else "",
173
+ "Block": row.get(MASTER_BLOCK_COL, "") if MASTER_BLOCK_COL in df.columns else "",
174
+ "UDISE_Code": row.get(MASTER_UDISE_COL, "") if MASTER_UDISE_COL in df.columns else "",
175
+ "Score": score,
176
+ })
177
+
178
+ candidates_df = pd.DataFrame(rows)
179
+ best_df = candidates_df.head(1).copy()
180
+ return candidates_df, best_df