gkdivya commited on
Commit
1289deb
·
verified ·
1 Parent(s): 6cbbee4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +71 -22
app.py CHANGED
@@ -8,12 +8,11 @@ import json
8
  from tavily import TavilyClient
9
 
10
  KYS_SAMPLE = "https://kys.udiseplus.gov.in/webapp/api/search-schools?searchType=3&searchParam={udise}"
11
- STATES = [
12
  "Arunachal_pradesh",
13
  "Assam",
14
  "Bihar",
15
  "Chhattisgarh",
16
- "Delhi",
17
  "Jharkhand",
18
  "Karnataka",
19
  "Madhya pradesh",
@@ -25,7 +24,6 @@ STATES = [
25
  "Puducherry",
26
  "Rajasthan",
27
  "Sikkim",
28
- "Tamil nadu",
29
  "Telangana",
30
  "Tripura",
31
  "Uttar pradesh",
@@ -55,21 +53,50 @@ def call_search_sdk(api_key, payload_text):
55
 
56
 
57
  def extract_udise_candidates_from_search(search_json):
 
 
 
 
 
58
  found = set()
59
- def walk(obj):
60
- if isinstance(obj, dict):
61
- for v in obj.values():
62
- walk(v)
63
- elif isinstance(obj, list):
64
- for item in obj:
65
- walk(item)
66
- elif isinstance(obj, str):
67
- s = obj.strip()
68
- tokens = s.replace('-', ' ').split()
69
- for t in tokens:
70
- if t.isdigit() and 6 <= len(t) <= 14:
71
- found.add(t)
72
- walk(search_json)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
73
  return sorted(list(found))
74
 
75
 
@@ -121,7 +148,7 @@ def to_table_from_kys(kys_json):
121
 
122
 
123
  def search_workflow(school_name, state_name, search_key, use_search, use_kys):
124
- out = {"kys": None, "search": None, "suggestions": []}
125
  payload_text = f"{school_name or ''} {state_name or ''} UDISE code".strip()
126
 
127
  if use_search:
@@ -130,6 +157,8 @@ def search_workflow(school_name, state_name, search_key, use_search, use_kys):
130
  if search_res.get("ok"):
131
  candidates = extract_udise_candidates_from_search(search_res["data"])
132
  out["suggestions"] = candidates
 
 
133
  else:
134
  out["search"] = {"ok": False, "error": "Search disabled or SDK not used"}
135
 
@@ -192,10 +221,30 @@ with the combined query `<school> <state> UDISE code`, then optionally calls KYS
192
  if save_key:
193
  new_saved_key = key or saved_key
194
  textbox_value = new_saved_key
195
- # return raw JSON objects (even if hidden) and the table + suggestions
196
- return res.get("search"), tbl, suggestions, new_saved_key, textbox_value, res.get("kys")
197
-
198
- run.click(on_run, inputs=[inp, state_dropdown, search_key, save_key_toggle, use_search, use_kys, saved_key_state], outputs=[output_json, search_table, suggestions_dropdown, saved_key_state, search_key, kys_output_json])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
199
 
200
  def on_select_suggestion(choice):
201
  return choice or ""
 
8
  from tavily import TavilyClient
9
 
10
  KYS_SAMPLE = "https://kys.udiseplus.gov.in/webapp/api/search-schools?searchType=3&searchParam={udise}"
11
+ states = [
12
  "Arunachal_pradesh",
13
  "Assam",
14
  "Bihar",
15
  "Chhattisgarh",
 
16
  "Jharkhand",
17
  "Karnataka",
18
  "Madhya pradesh",
 
24
  "Puducherry",
25
  "Rajasthan",
26
  "Sikkim",
 
27
  "Telangana",
28
  "Tripura",
29
  "Uttar pradesh",
 
53
 
54
 
55
  def extract_udise_candidates_from_search(search_json):
56
+ """
57
+ Extract UDISE codes from Tavily search results.
58
+ Looks for patterns like "UDISE Code: 12345678901" or "Udise School Code: 12345678901"
59
+ in the content and title fields of search results.
60
+ """
61
  found = set()
62
+
63
+ # Check if we have valid search results
64
+ if not search_json or not isinstance(search_json, dict):
65
+ return []
66
+
67
+ data = search_json.get('data', {})
68
+ results = data.get('results', [])
69
+
70
+ if not isinstance(results, list):
71
+ return []
72
+
73
+ # Patterns to match UDISE codes in text
74
+ patterns = [
75
+ r'UDISE\s*[Cc]ode\s*:?\s*(\d{11})', # Matches "UDISE Code: 12345678901"
76
+ r'Udise\s+School\s+Code\s*:?\s*(\d{11})', # Matches "Udise School Code: 12345678901"
77
+ r'\b(\d{11})\b' # Matches any 11-digit number as fallback
78
+ ]
79
+
80
+ for result in results:
81
+ if not isinstance(result, dict):
82
+ continue
83
+
84
+ # Check both content and title fields
85
+ for field in ['content', 'title']:
86
+ text = result.get(field, '')
87
+ if not isinstance(text, str):
88
+ continue
89
+
90
+ # Try each pattern
91
+ for pattern in patterns:
92
+ import re
93
+ matches = re.findall(pattern, text)
94
+ for match in matches:
95
+ if isinstance(match, tuple): # If pattern has groups
96
+ match = match[0] # Take the first group
97
+ if match.isdigit() and len(match) == 11: # UDISE codes are 11 digits
98
+ found.add(match)
99
+
100
  return sorted(list(found))
101
 
102
 
 
148
 
149
 
150
  def search_workflow(school_name, state_name, search_key, use_search, use_kys):
151
+ out = {"kys": None, "search": None, "suggestions": [], "first_candidate": None}
152
  payload_text = f"{school_name or ''} {state_name or ''} UDISE code".strip()
153
 
154
  if use_search:
 
157
  if search_res.get("ok"):
158
  candidates = extract_udise_candidates_from_search(search_res["data"])
159
  out["suggestions"] = candidates
160
+ if candidates: # If we have candidates, set the first one
161
+ out["first_candidate"] = candidates[0]
162
  else:
163
  out["search"] = {"ok": False, "error": "Search disabled or SDK not used"}
164
 
 
221
  if save_key:
222
  new_saved_key = key or saved_key
223
  textbox_value = new_saved_key
224
+ # Return the first candidate along with other values
225
+ return (
226
+ res.get("search"), # output_json
227
+ tbl, # search_table
228
+ suggestions, # suggestions_dropdown
229
+ res.get("first_candidate", ""), # This will update udise_input
230
+ new_saved_key, # saved_key_state
231
+ textbox_value, # search_key
232
+ res.get("kys") # kys_output_json
233
+ )
234
+
235
+ run.click(
236
+ on_run,
237
+ inputs=[inp, state_dropdown, search_key, save_key_toggle, use_search, use_kys, saved_key_state],
238
+ outputs=[
239
+ output_json,
240
+ search_table,
241
+ suggestions_dropdown,
242
+ udise_input, # This will be updated with first_candidate
243
+ saved_key_state,
244
+ search_key,
245
+ kys_output_json
246
+ ]
247
+ )
248
 
249
  def on_select_suggestion(choice):
250
  return choice or ""