gkdivya commited on
Commit
fcf55ea
·
verified ·
1 Parent(s): da56ca5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +106 -76
app.py CHANGED
@@ -4,19 +4,20 @@
4
  import gradio as gr
5
  import requests
6
  import pandas as pd
7
- import json
 
8
  from tavily import TavilyClient
9
 
 
10
  KYS_SAMPLE = "https://kys.udiseplus.gov.in/webapp/api/search-schools?searchType=3&searchParam={udise}"
11
  STATES = [
12
  "Arunachal_pradesh",
13
  "Assam",
14
  "Bihar",
15
  "Chhattisgarh",
16
- "Delhi",
17
  "Jharkhand",
18
  "Karnataka",
19
- "Madhya pradesh",
20
  "Manipur",
21
  "Meghalaya",
22
  "Mizoram",
@@ -25,16 +26,14 @@ STATES = [
25
  "Puducherry",
26
  "Rajasthan",
27
  "Sikkim",
28
- "Tamil nadu",
29
  "Telangana",
30
  "Tripura",
31
- "Uttar pradesh",
32
- "Uttarakhand"
33
  ]
34
 
35
-
36
-
37
- def call_kys_by_udise(udise_code):
38
  url = KYS_SAMPLE.format(udise=udise_code)
39
  try:
40
  resp = requests.get(url, timeout=10)
@@ -45,7 +44,7 @@ def call_kys_by_udise(udise_code):
45
  return {"ok": False, "error": str(e), "url": url}
46
 
47
 
48
- def call_search_sdk(api_key, payload_text):
49
  try:
50
  client = TavilyClient(api_key)
51
  resp = client.search(query=payload_text)
@@ -54,31 +53,58 @@ def call_search_sdk(api_key, payload_text):
54
  return {"ok": False, "error": str(e)}
55
 
56
 
57
- def extract_udise_candidates_from_search(search_json):
58
- found = set()
59
- def walk(obj):
60
- if isinstance(obj, dict):
61
- for v in obj.values():
62
- walk(v)
63
- elif isinstance(obj, list):
64
- for item in obj:
65
- walk(item)
66
- elif isinstance(obj, str):
67
- s = obj.strip()
68
- tokens = s.replace('-', ' ').split()
69
- for t in tokens:
70
- if t.isdigit() and 6 <= len(t) <= 14:
71
- found.add(t)
72
- walk(search_json)
73
- return sorted(list(found))
74
-
75
-
76
- def json_to_table(obj):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
77
  try:
78
  if isinstance(obj, list):
79
  return pd.json_normalize(obj)
80
  if isinstance(obj, dict):
81
- for k in ("results", "data", "hits", "items"):
82
  if k in obj and isinstance(obj[k], list):
83
  return pd.json_normalize(obj[k])
84
  return pd.json_normalize([obj])
@@ -87,40 +113,36 @@ def json_to_table(obj):
87
  return pd.DataFrame()
88
 
89
 
90
- def to_table_from_kys(kys_json):
91
- """
92
- Convert KYS JSON wrapper into a simplified pandas DataFrame showing only
93
- selected fields from the `content` list.
94
- """
95
  try:
96
  content = None
97
  if isinstance(kys_json, dict):
98
- inner = kys_json.get("data") if kys_json.get("data") is not None else None
99
- if isinstance(inner, dict) and isinstance(inner.get("content"), list):
100
- content = inner.get("content")
101
- elif isinstance(inner, dict) and isinstance(inner.get("data"), dict) and isinstance(inner.get("data").get("content"), list):
102
- content = inner.get("data").get("content")
103
  elif isinstance(kys_json.get("content"), list):
104
- content = kys_json.get("content")
105
  if not content:
106
  return pd.DataFrame()
107
- rows = []
108
- for r in content:
109
- rows.append({
110
  "School Name": r.get("schoolName"),
111
  "School ID": r.get("schoolId"),
112
  "Pincode": r.get("pincode"),
113
  "State": r.get("stateName"),
114
  "District": r.get("districtName"),
115
- "Management Type": r.get("schMgmtType")
116
- })
 
 
117
  return pd.DataFrame(rows)
118
  except Exception as e:
119
  print("to_table_from_kys error:", e)
120
  return pd.DataFrame()
121
 
122
 
123
- def search_workflow(school_name, state_name, search_key, use_search, use_kys):
 
124
  out = {"kys": None, "search": None, "suggestions": []}
125
  payload_text = f"{school_name or ''} {state_name or ''} UDISE code".strip()
126
 
@@ -140,21 +162,23 @@ def search_workflow(school_name, state_name, search_key, use_search, use_kys):
140
  return out
141
 
142
 
 
143
  with gr.Blocks() as demo:
144
- gr.Markdown(
145
- """
146
- # Search + KYS Lookup (Hugging Face Space)
147
-
148
- This version uses the Search SDK internally. Provide your API key in the textbox.
149
-
150
- Enter a school name (or UDISE code) and select the state; the app calls Search SDK
151
- with the combined query `<school> <state> UDISE code`, then optionally calls KYS.
152
- """
153
- )
154
 
155
  with gr.Row():
156
- inp = gr.Textbox(label="School name or UDISE code", placeholder="e.g. GOVT SEC SCHOOL DARLONG or 12345678901", lines=1)
157
- state_dropdown = gr.Dropdown(choices=STATES, label="State", value=STATES[0] if STATES else "", interactive=True, allow_custom_value=True)
 
 
 
 
 
 
 
 
 
 
158
 
159
  search_key = gr.Textbox(label="Search API Key (required)", placeholder="api-key...", lines=1)
160
  save_key_toggle = gr.Checkbox(value=False, label="Save key in session (keeps key between interactions)")
@@ -166,15 +190,17 @@ with the combined query `<school> <state> UDISE code`, then optionally calls KYS
166
 
167
  run = gr.Button("Search")
168
 
169
- # By default hide raw JSON outputs; users can toggle visibility with `show_raw_checkbox`
170
  show_raw_checkbox = gr.Checkbox(value=False, label="Show raw JSON outputs")
171
-
172
  output_json = gr.JSON(label="Raw Search Output (JSON)", visible=False)
173
  search_table = gr.DataFrame(headers=None, label="Search results (table)")
174
 
175
  gr.Markdown("### UDISE candidates found in Search results")
176
  suggestions_dropdown = gr.Dropdown(choices=[], label="UDISE candidates (from Search)")
177
- udise_input = gr.Textbox(label="UDISE to lookup (editable)", placeholder="Pick a candidate or type a UDISE code...", lines=1)
 
 
 
 
178
  lookup_btn = gr.Button("Lookup UDISE (Call KYS)")
179
 
180
  kys_output_json = gr.JSON(label="KYS Raw Output", visible=False)
@@ -194,10 +220,24 @@ with the combined query `<school> <state> UDISE code`, then optionally calls KYS
194
  if save_key:
195
  new_saved_key = key or saved_key
196
  textbox_value = new_saved_key
197
- # return raw JSON objects (even if hidden) and the table + suggestions
198
- return res.get("search"), tbl, suggestions, new_saved_key, textbox_value, res.get("kys")
199
 
200
- run.click(on_run, inputs=[inp, state_dropdown, search_key, save_key_toggle, use_search, use_kys, saved_key_state], outputs=[output_json, search_table, suggestions_dropdown, saved_key_state, search_key, kys_output_json])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
201
 
202
  def on_select_suggestion(choice):
203
  return choice or ""
@@ -220,21 +260,11 @@ with the combined query `<school> <state> UDISE code`, then optionally calls KYS
220
 
221
  clear_key_btn.click(on_clear_key, inputs=[saved_key_state], outputs=[saved_key_state, search_key])
222
 
223
- # Toggle visibility handler for raw JSON outputs
224
  def toggle_raw(visible: bool):
225
  return gr.update(visible=visible), gr.update(visible=visible)
226
 
227
  show_raw_checkbox.change(toggle_raw, inputs=[show_raw_checkbox], outputs=[output_json, kys_output_json])
228
 
229
- gr.Markdown(
230
- """
231
- ---
232
- **Notes:**
233
- - The 'Save key in session' toggle keeps the key active for the current session.
234
- - The key is stored only in-memory (not written to disk) and is not logged.
235
- - Use 'Clear saved key' to remove it from the session.
236
- """
237
- )
238
 
239
  if __name__ == "__main__":
240
  demo.launch()
 
4
  import gradio as gr
5
  import requests
6
  import pandas as pd
7
+ import re
8
+ from typing import Dict, Any, Tuple, List
9
  from tavily import TavilyClient
10
 
11
+ # ---------- constants ----------
12
  KYS_SAMPLE = "https://kys.udiseplus.gov.in/webapp/api/search-schools?searchType=3&searchParam={udise}"
13
  STATES = [
14
  "Arunachal_pradesh",
15
  "Assam",
16
  "Bihar",
17
  "Chhattisgarh",
 
18
  "Jharkhand",
19
  "Karnataka",
20
+ "Madhya_pradesh",
21
  "Manipur",
22
  "Meghalaya",
23
  "Mizoram",
 
26
  "Puducherry",
27
  "Rajasthan",
28
  "Sikkim",
 
29
  "Telangana",
30
  "Tripura",
31
+ "Uttar_pradesh",
32
+ "Uttarakhand",
33
  ]
34
 
35
+ # ---------- network helpers ----------
36
+ def call_kys_by_udise(udise_code: str) -> Dict[str, Any]:
 
37
  url = KYS_SAMPLE.format(udise=udise_code)
38
  try:
39
  resp = requests.get(url, timeout=10)
 
44
  return {"ok": False, "error": str(e), "url": url}
45
 
46
 
47
+ def call_search_sdk(api_key: str, payload_text: str) -> Dict[str, Any]:
48
  try:
49
  client = TavilyClient(api_key)
50
  resp = client.search(query=payload_text)
 
53
  return {"ok": False, "error": str(e)}
54
 
55
 
56
+ # ---------- UDISE extraction ----------
57
+ UDISE_RE = re.compile(r"\b(?:UDISE|Udise|udise|Udise Code|UDISE Code)[:\s]*([0-9]{6,12})\b")
58
+ GENERIC_NUM_RE = re.compile(r"\b([0-9]{5,12})\b")
59
+
60
+ def normalize_candidate(c: str) -> str:
61
+ return c.strip()
62
+
63
+
64
+ def extract_udise_code_from_tavily_results(tavily_json: Dict[str, Any]) -> Tuple[List[str], str]:
65
+ results = tavily_json.get("data", {}).get("results", []) if isinstance(tavily_json, dict) else []
66
+ candidates: List[str] = []
67
+ for r in results:
68
+ content = " ".join([str(r.get(k, "")) for k in ("content", "title")])
69
+ for m in UDISE_RE.finditer(content):
70
+ candidates.append(normalize_candidate(m.group(1)))
71
+ for m in GENERIC_NUM_RE.finditer(content):
72
+ candidates.append(normalize_candidate(m.group(1)))
73
+ url = r.get("url", "")
74
+ for m in GENERIC_NUM_RE.finditer(url):
75
+ candidates.append(normalize_candidate(m.group(1)))
76
+
77
+ # dedupe while preserving order
78
+ seen = set()
79
+ unique = []
80
+ for c in candidates:
81
+ if c and c not in seen:
82
+ seen.add(c)
83
+ unique.append(c)
84
+
85
+ # heuristics for best
86
+ best = ""
87
+ for c in unique:
88
+ if len(c) >= 10:
89
+ best = c
90
+ break
91
+ if not best and unique:
92
+ best = unique[0]
93
+ return unique, best
94
+
95
+
96
+ def extract_udise_candidates_from_search(search_data: Dict[str, Any]) -> List[str]:
97
+ candidates, _ = extract_udise_code_from_tavily_results(search_data)
98
+ return candidates
99
+
100
+
101
+ # ---------- helpers: JSON -> table ----------
102
+ def json_to_table(obj: Any) -> pd.DataFrame:
103
  try:
104
  if isinstance(obj, list):
105
  return pd.json_normalize(obj)
106
  if isinstance(obj, dict):
107
+ for k in ("results", "data", "hits", "items", "content"):
108
  if k in obj and isinstance(obj[k], list):
109
  return pd.json_normalize(obj[k])
110
  return pd.json_normalize([obj])
 
113
  return pd.DataFrame()
114
 
115
 
116
+ def to_table_from_kys(kys_json: Dict[str, Any]) -> pd.DataFrame:
 
 
 
 
117
  try:
118
  content = None
119
  if isinstance(kys_json, dict):
120
+ if isinstance(kys_json.get("data"), dict) and isinstance(kys_json["data"].get("content"), list):
121
+ content = kys_json["data"]["content"]
 
 
 
122
  elif isinstance(kys_json.get("content"), list):
123
+ content = kys_json["content"]
124
  if not content:
125
  return pd.DataFrame()
126
+
127
+ rows = [
128
+ {
129
  "School Name": r.get("schoolName"),
130
  "School ID": r.get("schoolId"),
131
  "Pincode": r.get("pincode"),
132
  "State": r.get("stateName"),
133
  "District": r.get("districtName"),
134
+ "Management Type": r.get("schMgmtType"),
135
+ }
136
+ for r in content
137
+ ]
138
  return pd.DataFrame(rows)
139
  except Exception as e:
140
  print("to_table_from_kys error:", e)
141
  return pd.DataFrame()
142
 
143
 
144
+ # ---------- main workflow ----------
145
+ def search_workflow(school_name: str, state_name: str, search_key: str, use_search: bool, use_kys: bool):
146
  out = {"kys": None, "search": None, "suggestions": []}
147
  payload_text = f"{school_name or ''} {state_name or ''} UDISE code".strip()
148
 
 
162
  return out
163
 
164
 
165
+ # ---------- Gradio UI ----------
166
  with gr.Blocks() as demo:
167
+ gr.Markdown("# Search + KYS Lookup (Hugging Face Space)")
 
 
 
 
 
 
 
 
 
168
 
169
  with gr.Row():
170
+ inp = gr.Textbox(
171
+ label="School name or UDISE code",
172
+ placeholder="e.g. GOVT SEC SCHOOL DARLONG or 12345678901",
173
+ lines=1,
174
+ )
175
+ state_dropdown = gr.Dropdown(
176
+ choices=STATES,
177
+ label="State",
178
+ value=STATES[0] if STATES else "",
179
+ interactive=True,
180
+ allow_custom_value=True,
181
+ )
182
 
183
  search_key = gr.Textbox(label="Search API Key (required)", placeholder="api-key...", lines=1)
184
  save_key_toggle = gr.Checkbox(value=False, label="Save key in session (keeps key between interactions)")
 
190
 
191
  run = gr.Button("Search")
192
 
 
193
  show_raw_checkbox = gr.Checkbox(value=False, label="Show raw JSON outputs")
 
194
  output_json = gr.JSON(label="Raw Search Output (JSON)", visible=False)
195
  search_table = gr.DataFrame(headers=None, label="Search results (table)")
196
 
197
  gr.Markdown("### UDISE candidates found in Search results")
198
  suggestions_dropdown = gr.Dropdown(choices=[], label="UDISE candidates (from Search)")
199
+ udise_input = gr.Textbox(
200
+ label="UDISE to lookup (editable)",
201
+ placeholder="Pick a candidate or type a UDISE code...",
202
+ lines=1,
203
+ )
204
  lookup_btn = gr.Button("Lookup UDISE (Call KYS)")
205
 
206
  kys_output_json = gr.JSON(label="KYS Raw Output", visible=False)
 
220
  if save_key:
221
  new_saved_key = key or saved_key
222
  textbox_value = new_saved_key
 
 
223
 
224
+ suggestions_update = gr.Dropdown.update(
225
+ choices=suggestions, value=suggestions[0] if suggestions else None
226
+ )
227
+ return (
228
+ res.get("search"),
229
+ tbl,
230
+ suggestions_update,
231
+ new_saved_key,
232
+ textbox_value,
233
+ res.get("kys"),
234
+ )
235
+
236
+ run.click(
237
+ on_run,
238
+ inputs=[inp, state_dropdown, search_key, save_key_toggle, use_search, use_kys, saved_key_state],
239
+ outputs=[output_json, search_table, suggestions_dropdown, saved_key_state, search_key, kys_output_json],
240
+ )
241
 
242
  def on_select_suggestion(choice):
243
  return choice or ""
 
260
 
261
  clear_key_btn.click(on_clear_key, inputs=[saved_key_state], outputs=[saved_key_state, search_key])
262
 
 
263
  def toggle_raw(visible: bool):
264
  return gr.update(visible=visible), gr.update(visible=visible)
265
 
266
  show_raw_checkbox.change(toggle_raw, inputs=[show_raw_checkbox], outputs=[output_json, kys_output_json])
267
 
 
 
 
 
 
 
 
 
 
268
 
269
  if __name__ == "__main__":
270
  demo.launch()