gkdivya commited on
Commit
4842e86
·
verified ·
1 Parent(s): fcf55ea

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +76 -108
app.py CHANGED
@@ -4,20 +4,19 @@
4
  import gradio as gr
5
  import requests
6
  import pandas as pd
7
- import re
8
- from typing import Dict, Any, Tuple, List
9
  from tavily import TavilyClient
10
 
11
- # ---------- constants ----------
12
  KYS_SAMPLE = "https://kys.udiseplus.gov.in/webapp/api/search-schools?searchType=3&searchParam={udise}"
13
- STATES = [
14
  "Arunachal_pradesh",
15
  "Assam",
16
  "Bihar",
17
  "Chhattisgarh",
 
18
  "Jharkhand",
19
  "Karnataka",
20
- "Madhya_pradesh",
21
  "Manipur",
22
  "Meghalaya",
23
  "Mizoram",
@@ -26,14 +25,16 @@ STATES = [
26
  "Puducherry",
27
  "Rajasthan",
28
  "Sikkim",
 
29
  "Telangana",
30
  "Tripura",
31
- "Uttar_pradesh",
32
- "Uttarakhand",
33
  ]
34
 
35
- # ---------- network helpers ----------
36
- def call_kys_by_udise(udise_code: str) -> Dict[str, Any]:
 
37
  url = KYS_SAMPLE.format(udise=udise_code)
38
  try:
39
  resp = requests.get(url, timeout=10)
@@ -44,7 +45,7 @@ def call_kys_by_udise(udise_code: str) -> Dict[str, Any]:
44
  return {"ok": False, "error": str(e), "url": url}
45
 
46
 
47
- def call_search_sdk(api_key: str, payload_text: str) -> Dict[str, Any]:
48
  try:
49
  client = TavilyClient(api_key)
50
  resp = client.search(query=payload_text)
@@ -53,58 +54,31 @@ def call_search_sdk(api_key: str, payload_text: str) -> Dict[str, Any]:
53
  return {"ok": False, "error": str(e)}
54
 
55
 
56
- # ---------- UDISE extraction ----------
57
- UDISE_RE = re.compile(r"\b(?:UDISE|Udise|udise|Udise Code|UDISE Code)[:\s]*([0-9]{6,12})\b")
58
- GENERIC_NUM_RE = re.compile(r"\b([0-9]{5,12})\b")
59
-
60
- def normalize_candidate(c: str) -> str:
61
- return c.strip()
62
-
63
-
64
- def extract_udise_code_from_tavily_results(tavily_json: Dict[str, Any]) -> Tuple[List[str], str]:
65
- results = tavily_json.get("data", {}).get("results", []) if isinstance(tavily_json, dict) else []
66
- candidates: List[str] = []
67
- for r in results:
68
- content = " ".join([str(r.get(k, "")) for k in ("content", "title")])
69
- for m in UDISE_RE.finditer(content):
70
- candidates.append(normalize_candidate(m.group(1)))
71
- for m in GENERIC_NUM_RE.finditer(content):
72
- candidates.append(normalize_candidate(m.group(1)))
73
- url = r.get("url", "")
74
- for m in GENERIC_NUM_RE.finditer(url):
75
- candidates.append(normalize_candidate(m.group(1)))
76
-
77
- # dedupe while preserving order
78
- seen = set()
79
- unique = []
80
- for c in candidates:
81
- if c and c not in seen:
82
- seen.add(c)
83
- unique.append(c)
84
-
85
- # heuristics for best
86
- best = ""
87
- for c in unique:
88
- if len(c) >= 10:
89
- best = c
90
- break
91
- if not best and unique:
92
- best = unique[0]
93
- return unique, best
94
-
95
-
96
- def extract_udise_candidates_from_search(search_data: Dict[str, Any]) -> List[str]:
97
- candidates, _ = extract_udise_code_from_tavily_results(search_data)
98
- return candidates
99
-
100
-
101
- # ---------- helpers: JSON -> table ----------
102
- def json_to_table(obj: Any) -> pd.DataFrame:
103
  try:
104
  if isinstance(obj, list):
105
  return pd.json_normalize(obj)
106
  if isinstance(obj, dict):
107
- for k in ("results", "data", "hits", "items", "content"):
108
  if k in obj and isinstance(obj[k], list):
109
  return pd.json_normalize(obj[k])
110
  return pd.json_normalize([obj])
@@ -113,36 +87,40 @@ def json_to_table(obj: Any) -> pd.DataFrame:
113
  return pd.DataFrame()
114
 
115
 
116
- def to_table_from_kys(kys_json: Dict[str, Any]) -> pd.DataFrame:
 
 
 
 
117
  try:
118
  content = None
119
  if isinstance(kys_json, dict):
120
- if isinstance(kys_json.get("data"), dict) and isinstance(kys_json["data"].get("content"), list):
121
- content = kys_json["data"]["content"]
 
 
 
122
  elif isinstance(kys_json.get("content"), list):
123
- content = kys_json["content"]
124
  if not content:
125
  return pd.DataFrame()
126
-
127
- rows = [
128
- {
129
  "School Name": r.get("schoolName"),
130
  "School ID": r.get("schoolId"),
131
  "Pincode": r.get("pincode"),
132
  "State": r.get("stateName"),
133
  "District": r.get("districtName"),
134
- "Management Type": r.get("schMgmtType"),
135
- }
136
- for r in content
137
- ]
138
  return pd.DataFrame(rows)
139
  except Exception as e:
140
  print("to_table_from_kys error:", e)
141
  return pd.DataFrame()
142
 
143
 
144
- # ---------- main workflow ----------
145
- def search_workflow(school_name: str, state_name: str, search_key: str, use_search: bool, use_kys: bool):
146
  out = {"kys": None, "search": None, "suggestions": []}
147
  payload_text = f"{school_name or ''} {state_name or ''} UDISE code".strip()
148
 
@@ -162,23 +140,19 @@ def search_workflow(school_name: str, state_name: str, search_key: str, use_sear
162
  return out
163
 
164
 
165
- # ---------- Gradio UI ----------
166
  with gr.Blocks() as demo:
167
- gr.Markdown("# Search + KYS Lookup (Hugging Face Space)")
 
 
 
 
 
 
 
168
 
169
  with gr.Row():
170
- inp = gr.Textbox(
171
- label="School name or UDISE code",
172
- placeholder="e.g. GOVT SEC SCHOOL DARLONG or 12345678901",
173
- lines=1,
174
- )
175
- state_dropdown = gr.Dropdown(
176
- choices=STATES,
177
- label="State",
178
- value=STATES[0] if STATES else "",
179
- interactive=True,
180
- allow_custom_value=True,
181
- )
182
 
183
  search_key = gr.Textbox(label="Search API Key (required)", placeholder="api-key...", lines=1)
184
  save_key_toggle = gr.Checkbox(value=False, label="Save key in session (keeps key between interactions)")
@@ -190,17 +164,15 @@ with gr.Blocks() as demo:
190
 
191
  run = gr.Button("Search")
192
 
 
193
  show_raw_checkbox = gr.Checkbox(value=False, label="Show raw JSON outputs")
 
194
  output_json = gr.JSON(label="Raw Search Output (JSON)", visible=False)
195
  search_table = gr.DataFrame(headers=None, label="Search results (table)")
196
 
197
  gr.Markdown("### UDISE candidates found in Search results")
198
  suggestions_dropdown = gr.Dropdown(choices=[], label="UDISE candidates (from Search)")
199
- udise_input = gr.Textbox(
200
- label="UDISE to lookup (editable)",
201
- placeholder="Pick a candidate or type a UDISE code...",
202
- lines=1,
203
- )
204
  lookup_btn = gr.Button("Lookup UDISE (Call KYS)")
205
 
206
  kys_output_json = gr.JSON(label="KYS Raw Output", visible=False)
@@ -220,24 +192,10 @@ with gr.Blocks() as demo:
220
  if save_key:
221
  new_saved_key = key or saved_key
222
  textbox_value = new_saved_key
 
 
223
 
224
- suggestions_update = gr.Dropdown.update(
225
- choices=suggestions, value=suggestions[0] if suggestions else None
226
- )
227
- return (
228
- res.get("search"),
229
- tbl,
230
- suggestions_update,
231
- new_saved_key,
232
- textbox_value,
233
- res.get("kys"),
234
- )
235
-
236
- run.click(
237
- on_run,
238
- inputs=[inp, state_dropdown, search_key, save_key_toggle, use_search, use_kys, saved_key_state],
239
- outputs=[output_json, search_table, suggestions_dropdown, saved_key_state, search_key, kys_output_json],
240
- )
241
 
242
  def on_select_suggestion(choice):
243
  return choice or ""
@@ -260,11 +218,21 @@ with gr.Blocks() as demo:
260
 
261
  clear_key_btn.click(on_clear_key, inputs=[saved_key_state], outputs=[saved_key_state, search_key])
262
 
 
263
  def toggle_raw(visible: bool):
264
  return gr.update(visible=visible), gr.update(visible=visible)
265
 
266
  show_raw_checkbox.change(toggle_raw, inputs=[show_raw_checkbox], outputs=[output_json, kys_output_json])
267
 
 
 
 
 
 
 
 
 
 
268
 
269
  if __name__ == "__main__":
270
- demo.launch()
 
4
  import gradio as gr
5
  import requests
6
  import pandas as pd
7
+ import json
 
8
  from tavily import TavilyClient
9
 
 
10
  KYS_SAMPLE = "https://kys.udiseplus.gov.in/webapp/api/search-schools?searchType=3&searchParam={udise}"
11
+ states = [
12
  "Arunachal_pradesh",
13
  "Assam",
14
  "Bihar",
15
  "Chhattisgarh",
16
+ "Delhi",
17
  "Jharkhand",
18
  "Karnataka",
19
+ "Madhya pradesh",
20
  "Manipur",
21
  "Meghalaya",
22
  "Mizoram",
 
25
  "Puducherry",
26
  "Rajasthan",
27
  "Sikkim",
28
+ "Tamil nadu",
29
  "Telangana",
30
  "Tripura",
31
+ "Uttar pradesh",
32
+ "Uttarakhand"
33
  ]
34
 
35
+
36
+
37
+ def call_kys_by_udise(udise_code):
38
  url = KYS_SAMPLE.format(udise=udise_code)
39
  try:
40
  resp = requests.get(url, timeout=10)
 
45
  return {"ok": False, "error": str(e), "url": url}
46
 
47
 
48
+ def call_search_sdk(api_key, payload_text):
49
  try:
50
  client = TavilyClient(api_key)
51
  resp = client.search(query=payload_text)
 
54
  return {"ok": False, "error": str(e)}
55
 
56
 
57
+ def extract_udise_candidates_from_search(search_json):
58
+ found = set()
59
+ def walk(obj):
60
+ if isinstance(obj, dict):
61
+ for v in obj.values():
62
+ walk(v)
63
+ elif isinstance(obj, list):
64
+ for item in obj:
65
+ walk(item)
66
+ elif isinstance(obj, str):
67
+ s = obj.strip()
68
+ tokens = s.replace('-', ' ').split()
69
+ for t in tokens:
70
+ if t.isdigit() and 6 <= len(t) <= 14:
71
+ found.add(t)
72
+ walk(search_json)
73
+ return sorted(list(found))
74
+
75
+
76
+ def json_to_table(obj):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
77
  try:
78
  if isinstance(obj, list):
79
  return pd.json_normalize(obj)
80
  if isinstance(obj, dict):
81
+ for k in ("results", "data", "hits", "items"):
82
  if k in obj and isinstance(obj[k], list):
83
  return pd.json_normalize(obj[k])
84
  return pd.json_normalize([obj])
 
87
  return pd.DataFrame()
88
 
89
 
90
+ def to_table_from_kys(kys_json):
91
+ """
92
+ Convert KYS JSON wrapper into a simplified pandas DataFrame showing only
93
+ selected fields from the `content` list.
94
+ """
95
  try:
96
  content = None
97
  if isinstance(kys_json, dict):
98
+ inner = kys_json.get("data") if kys_json.get("data") is not None else None
99
+ if isinstance(inner, dict) and isinstance(inner.get("content"), list):
100
+ content = inner.get("content")
101
+ elif isinstance(inner, dict) and isinstance(inner.get("data"), dict) and isinstance(inner.get("data").get("content"), list):
102
+ content = inner.get("data").get("content")
103
  elif isinstance(kys_json.get("content"), list):
104
+ content = kys_json.get("content")
105
  if not content:
106
  return pd.DataFrame()
107
+ rows = []
108
+ for r in content:
109
+ rows.append({
110
  "School Name": r.get("schoolName"),
111
  "School ID": r.get("schoolId"),
112
  "Pincode": r.get("pincode"),
113
  "State": r.get("stateName"),
114
  "District": r.get("districtName"),
115
+ "Management Type": r.get("schMgmtType")
116
+ })
 
 
117
  return pd.DataFrame(rows)
118
  except Exception as e:
119
  print("to_table_from_kys error:", e)
120
  return pd.DataFrame()
121
 
122
 
123
+ def search_workflow(school_name, state_name, search_key, use_search, use_kys):
 
124
  out = {"kys": None, "search": None, "suggestions": []}
125
  payload_text = f"{school_name or ''} {state_name or ''} UDISE code".strip()
126
 
 
140
  return out
141
 
142
 
 
143
  with gr.Blocks() as demo:
144
+ gr.Markdown(
145
+ """
146
+ # Search + KYS Lookup (Hugging Face Space)
147
+ This version uses the Search SDK internally. Provide your API key in the textbox.
148
+ Enter a school name (or UDISE code) and select the state; the app calls Search SDK
149
+ with the combined query `<school> <state> UDISE code`, then optionally calls KYS.
150
+ """
151
+ )
152
 
153
  with gr.Row():
154
+ inp = gr.Textbox(label="School name or UDISE code", placeholder="e.g. GOVT SEC SCHOOL DARLONG or 12345678901", lines=1)
155
+ state_dropdown = gr.Dropdown(choices=STATES, label="State", value=STATES[0] if STATES else "", interactive=True, allow_custom_value=True)
 
 
 
 
 
 
 
 
 
 
156
 
157
  search_key = gr.Textbox(label="Search API Key (required)", placeholder="api-key...", lines=1)
158
  save_key_toggle = gr.Checkbox(value=False, label="Save key in session (keeps key between interactions)")
 
164
 
165
  run = gr.Button("Search")
166
 
167
+ # By default hide raw JSON outputs; users can toggle visibility with `show_raw_checkbox`
168
  show_raw_checkbox = gr.Checkbox(value=False, label="Show raw JSON outputs")
169
+
170
  output_json = gr.JSON(label="Raw Search Output (JSON)", visible=False)
171
  search_table = gr.DataFrame(headers=None, label="Search results (table)")
172
 
173
  gr.Markdown("### UDISE candidates found in Search results")
174
  suggestions_dropdown = gr.Dropdown(choices=[], label="UDISE candidates (from Search)")
175
+ udise_input = gr.Textbox(label="UDISE to lookup (editable)", placeholder="Pick a candidate or type a UDISE code...", lines=1)
 
 
 
 
176
  lookup_btn = gr.Button("Lookup UDISE (Call KYS)")
177
 
178
  kys_output_json = gr.JSON(label="KYS Raw Output", visible=False)
 
192
  if save_key:
193
  new_saved_key = key or saved_key
194
  textbox_value = new_saved_key
195
+ # return raw JSON objects (even if hidden) and the table + suggestions
196
+ return res.get("search"), tbl, suggestions, new_saved_key, textbox_value, res.get("kys")
197
 
198
+ run.click(on_run, inputs=[inp, state_dropdown, search_key, save_key_toggle, use_search, use_kys, saved_key_state], outputs=[output_json, search_table, suggestions_dropdown, saved_key_state, search_key, kys_output_json])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
199
 
200
  def on_select_suggestion(choice):
201
  return choice or ""
 
218
 
219
  clear_key_btn.click(on_clear_key, inputs=[saved_key_state], outputs=[saved_key_state, search_key])
220
 
221
+ # Toggle visibility handler for raw JSON outputs
222
  def toggle_raw(visible: bool):
223
  return gr.update(visible=visible), gr.update(visible=visible)
224
 
225
  show_raw_checkbox.change(toggle_raw, inputs=[show_raw_checkbox], outputs=[output_json, kys_output_json])
226
 
227
+ gr.Markdown(
228
+ """
229
+ ---
230
+ **Notes:**
231
+ - The 'Save key in session' toggle keeps the key active for the current session.
232
+ - The key is stored only in-memory (not written to disk) and is not logged.
233
+ - Use 'Clear saved key' to remove it from the session.
234
+ """
235
+ )
236
 
237
  if __name__ == "__main__":
238
+ demo.launch()