Spaces:
Sleeping
Sleeping
| # search_kys_space.py | |
| # Gradio app for Hugging Face Spaces (uses Search API via Tavily SDK internally) | |
| import gradio as gr | |
| import requests | |
| import pandas as pd | |
| import json | |
| import re | |
| from difflib import get_close_matches | |
| from tavily import TavilyClient | |
| KYS_SAMPLE = "https://kys.udiseplus.gov.in/webapp/api/search-schools?searchType=3&searchParam={udise}" | |
| # Mapping of state names to UDISE state codes | |
| STATE_TO_UDISE_CODE = { | |
| 'Jammu & Kashmir': '01', | |
| 'Himachal Pradesh': '02', | |
| 'Punjab': '03', | |
| 'Chandigarh': '04', | |
| 'Uttarakhand': '05', | |
| 'Haryana': '06', | |
| 'Delhi': '07', | |
| 'Rajasthan': '08', | |
| 'Uttar Pradesh': '09', | |
| 'Bihar': '10', | |
| 'Sikkim': '11', | |
| 'Arunachal Pradesh': '12', | |
| 'Nagaland': '13', | |
| 'Manipur': '14', | |
| 'Mizoram': '15', | |
| 'Tripura': '16', | |
| 'Meghalaya': '17', | |
| 'Assam': '18', | |
| 'West Bengal': '19', | |
| 'Jharkhand': '20', | |
| 'Odisha': '21', | |
| 'Chhattisgarh': '22', | |
| 'Madhya Pradesh': '23', | |
| 'Gujarat': '24', | |
| 'Daman & Diu': '25', | |
| 'Dadra & Nagar Haveli': '26', | |
| 'Maharashtra': '27', | |
| 'Andhra Pradesh': '28', | |
| 'Karnataka': '29', | |
| 'Goa': '30', | |
| 'Lakshadweep': '31', | |
| 'Kerala': '32', | |
| 'Tamil Nadu': '33', | |
| 'Puducherry': '34', | |
| 'Andaman & Nicobar Islands': '35', | |
| 'Telangana': '36', | |
| 'Ladakh': '37' | |
| } | |
| # For backward compatibility | |
| VALID_UDISE_STATE_CODES = list(STATE_TO_UDISE_CODE.values()) | |
| def is_valid_udise(code, state_name=None): | |
| """ | |
| Check if a string is a valid UDISE code. | |
| Args: | |
| code: The UDISE code to validate | |
| state_name: Optional state name to validate against the UDISE state code | |
| Returns: | |
| bool: True if the code is valid, False otherwise | |
| """ | |
| # Basic validation | |
| if not (code and code.isdigit() and len(code) == 11): | |
| return False | |
| state_code = code[:2] | |
| # Check if state code is valid | |
| if state_code not in VALID_UDISE_STATE_CODES: | |
| return False | |
| # If state_name is provided, validate against it | |
| if state_name: | |
| state_name = state_name.strip().title() | |
| # Handle special case for 'Uttar pradesh' vs 'Uttar Pradesh' | |
| state_name = state_name.replace('_', ' ') | |
| expected_code = STATE_TO_UDISE_CODE.get(state_name) | |
| if not expected_code: | |
| print(f"Warning: Unknown state name: {state_name}") | |
| return False | |
| if state_code != expected_code: | |
| print(f"UDISE code {code} state code {state_code} does not match expected state {state_name} ({expected_code})") | |
| return False | |
| return True | |
| STATES = [ | |
| "Arunachal_pradesh", | |
| "Assam", | |
| "Bihar", | |
| "Chhattisgarh", | |
| "Jharkhand", | |
| "Karnataka", | |
| "Madhya pradesh", | |
| "Manipur", | |
| "Meghalaya", | |
| "Mizoram", | |
| "Nagaland", | |
| "Odisha", | |
| "Puducherry", | |
| "Rajasthan", | |
| "Sikkim", | |
| "Telangana", | |
| "Tripura", | |
| "Uttar pradesh", | |
| "Uttarakhand" | |
| ] | |
| def call_kys_by_udise(udise_code): | |
| url = KYS_SAMPLE.format(udise=udise_code) | |
| try: | |
| resp = requests.get(url, timeout=10) | |
| resp.raise_for_status() | |
| data = resp.json() | |
| return {"ok": True, "url": url, "data": data} | |
| except Exception as e: | |
| return {"ok": False, "error": str(e), "url": url} | |
| def call_search_sdk(api_key, payload_text): | |
| try: | |
| client = TavilyClient(api_key) | |
| resp = client.search(query=payload_text) | |
| return {"ok": True, "data": resp} | |
| except Exception as e: | |
| return {"ok": False, "error": str(e)} | |
| def extract_udise_candidates_from_search(search_json, state_name=None, search_query=None): | |
| """ | |
| Extract UDISE codes and school information from Tavily search results. | |
| Args: | |
| search_json: JSON response from Tavily search | |
| state_name: Optional state name to validate UDISE codes against | |
| search_query: Original search query to help with fuzzy matching | |
| Returns: | |
| list: List of dictionaries containing UDISE codes and school information | |
| """ | |
| print("\n===== Extracting UDISE Codes =====") | |
| if state_name: | |
| print(f"Validating UDISE codes against state: {state_name}") | |
| found_codes = set() | |
| school_info = [] # List to store school information | |
| # Check if we have valid search results | |
| if not search_json or not isinstance(search_json, dict): | |
| print("Invalid search JSON") | |
| return [] | |
| results = search_json.get('results', []) or search_json.get('data', {}).get('results', []) | |
| if not results: | |
| print("No results found in search JSON") | |
| return [] | |
| print(f"Found {len(results)} search results") | |
| # Patterns to match UDISE codes and school information | |
| patterns = [ | |
| r'UDISE[^\d]*(?:code|Code|CODE)[^\d]*(\d{11})(?![0-9])', | |
| r'Udise[^\d]*(?:School[^\d]*Code|Code)[^\d]*(\d{11})(?![0-9])', | |
| r'(?<![0-9])(\d{11})(?![0-9])' # Fallback: any 11-digit number | |
| ] | |
| for result in results: | |
| if not isinstance(result, dict): | |
| continue | |
| # Get title and content | |
| title = result.get('title', '') | |
| content = result.get('content', '') | |
| url = result.get('url', '') | |
| text = f"{title} {content}" | |
| # Check for UDISE codes using all patterns | |
| for pattern in patterns: | |
| matches = re.finditer(pattern, text, re.IGNORECASE) | |
| for match in matches: | |
| udise_code = match.group(1) if len(match.groups()) > 0 else match.group(0) | |
| if udise_code and is_valid_udise(udise_code, state_name) and udise_code not in found_codes: | |
| print(f"Found valid UDISE code: {udise_code}") | |
| found_codes.add(udise_code) | |
| # Extract school name - try to find the most relevant text | |
| school_name = title | |
| # If title is too short or doesn't seem like a school name, try to find a better match | |
| if len(school_name.split()) < 2 or any(word in school_name.lower() for word in ['udise', 'code', 'school']): | |
| # Look for a school-like name in the content | |
| school_matches = re.findall(r'([A-Z][a-z]+(?:\s+[A-Z][a-z]+)+\s*(?:School|School|High School|High School|Vidyalaya|Vidyalaya|Vidyalayam|Vidyalayam|Vidhya|Vidhya|Vidya|Vidya|Public School|Public School|Govt|Government|Kendriya|Jawahar|Navodaya|Sainik|Army|Air Force|Navy|Central School|Central School|CBSE|ICSE|State Board|State Board|EM|EM|TM|TM|Primary|Primary|Upper Primary|Upper Primary|Higher Secondary|Higher Secondary|HSS|HSS|HS|HS|UPS|UPS|PS|PS))', content, re.IGNORECASE) | |
| if school_matches: | |
| school_name = school_matches[0][0].strip() | |
| school_info.append({ | |
| 'udise': udise_code, | |
| 'name': school_name, | |
| 'source': url, | |
| 'snippet': content[:200] + '...' if len(content) > 200 else content | |
| }) | |
| # If we have a search query, sort results by relevance to the query | |
| if search_query and school_info: | |
| # Extract just the school names for fuzzy matching | |
| school_names = [s['name'] for s in school_info] | |
| # Get fuzzy matches and their scores | |
| matches = get_close_matches( | |
| search_query.lower(), | |
| [name.lower() for name in school_names], | |
| n=len(school_names), | |
| cutoff=0.3 # Lower cutoff to allow more fuzzy matches | |
| ) | |
| # Create a dictionary to map lowercase names to their original objects with scores | |
| school_map = {s['name'].lower(): s for s in school_info} | |
| # Rebuild the school_info list in order of best match | |
| sorted_schools = [] | |
| for match in matches: | |
| if match in school_map: | |
| sorted_schools.append(school_map[match]) | |
| del school_map[match] | |
| # Add any remaining schools that didn't match the fuzzy search | |
| sorted_schools.extend(school_map.values()) | |
| school_info = sorted_schools | |
| if not school_info: | |
| print("No valid school information found with UDISE codes") | |
| return [] | |
| return school_info | |
| def json_to_table(obj): | |
| try: | |
| if isinstance(obj, list): | |
| return pd.json_normalize(obj) | |
| if isinstance(obj, dict): | |
| for k in ("results", "data", "hits", "items"): | |
| if k in obj and isinstance(obj[k], list): | |
| return pd.json_normalize(obj[k]) | |
| return pd.json_normalize([obj]) | |
| except Exception: | |
| pass | |
| return pd.DataFrame() | |
| def to_table_from_kys(kys_json): | |
| """ | |
| Convert KYS JSON wrapper into a simplified pandas DataFrame showing only | |
| selected fields from the `content` list. | |
| """ | |
| try: | |
| content = None | |
| if isinstance(kys_json, dict): | |
| inner = kys_json.get("data") if kys_json.get("data") is not None else None | |
| if isinstance(inner, dict) and isinstance(inner.get("content"), list): | |
| content = inner.get("content") | |
| elif isinstance(inner, dict) and isinstance(inner.get("data"), dict) and isinstance(inner.get("data").get("content"), list): | |
| content = inner.get("data").get("content") | |
| elif isinstance(kys_json.get("content"), list): | |
| content = kys_json.get("content") | |
| if not content: | |
| return pd.DataFrame() | |
| rows = [] | |
| for r in content: | |
| rows.append({ | |
| "School Name": r.get("schoolName"), | |
| "School ID": r.get("schoolId"), | |
| "Pincode": r.get("pincode"), | |
| "State": r.get("stateName"), | |
| "District": r.get("districtName"), | |
| "Management Type": r.get("schMgmtType") | |
| }) | |
| return pd.DataFrame(rows) | |
| except Exception as e: | |
| print("to_table_from_kys error:", e) | |
| return pd.DataFrame() | |
| def search_workflow(school_name, state_name, search_key, use_search=True, use_kys=True): | |
| out = {"kys": None, "search": None, "suggestions": [], "first_candidate": None, "school_info": []} | |
| payload_text = f"{school_name or ''} {state_name or ''} UDISE code".strip() | |
| if use_search: | |
| search_res = call_search_sdk(search_key, payload_text) | |
| out["search"] = search_res | |
| if search_res.get("ok"): | |
| # Pass school_name for fuzzy matching and state_name for validation | |
| school_info = extract_udise_candidates_from_search( | |
| search_res["data"], | |
| state_name=state_name, | |
| search_query=school_name | |
| ) | |
| # Extract just the UDISE codes for backward compatibility | |
| candidates = [info['udise'] for info in school_info] | |
| out["suggestions"] = [ | |
| f"{info['name']} (UDISE: {info['udise']})" | |
| for info in school_info | |
| ] | |
| out["school_info"] = school_info | |
| if candidates and candidates[0] != "No UDISE codes found": | |
| out["first_candidate"] = candidates[0] | |
| else: | |
| out["search"] = {"ok": False, "error": "Search disabled or SDK not used"} | |
| if use_kys and school_name and school_name.strip().isdigit() and 6 <= len(school_name.strip()) <= 14: | |
| kys_res = call_kys_by_udise(school_name.strip()) | |
| out["kys"] = kys_res | |
| return out | |
| with gr.Blocks() as demo: | |
| gr.Markdown( | |
| """ | |
| # Find School UDISE Code | |
| Provide your API key in the textbox. | |
| Enter a school name and select the state | |
| """ | |
| ) | |
| with gr.Row(): | |
| inp = gr.Textbox(label="School name or UDISE code", placeholder="e.g. GOVT SEC SCHOOL DARLONG or 12345678901", lines=1) | |
| state_dropdown = gr.Dropdown(choices=STATES, label="State", value=STATES[0] if STATES else "", interactive=True, allow_custom_value=True) | |
| search_key = gr.Textbox(label="Search API Key (required)", placeholder="api-key...", lines=1) | |
| run = gr.Button("Search", variant="primary") | |
| # By default hide raw JSON outputs; users can toggle visibility with `show_raw_checkbox` | |
| show_raw_checkbox = gr.Checkbox(value=False, label="Show raw JSON outputs") | |
| output_json = gr.JSON(label="Raw Search Output (JSON)", visible=False) | |
| search_table = gr.DataFrame(headers=None, label="Search results (table)") | |
| gr.Markdown("### UDISE candidates found in Search results") | |
| suggestions_dropdown = gr.Dropdown(choices=[], label="UDISE candidates (from Search)") | |
| udise_input = gr.Textbox(label="UDISE to lookup (editable)", placeholder="Pick a candidate or type a UDISE code...", lines=1) | |
| lookup_btn = gr.Button("Lookup UDISE (Call KYS)") | |
| kys_output_json = gr.JSON(label="KYS Raw Output", visible=False) | |
| kys_table = gr.DataFrame(headers=None, label="KYS results (table)") | |
| saved_key_state = gr.State("") | |
| def on_run(school, state, key, saved_key): | |
| # Always use the saved key if it exists, otherwise use the provided key | |
| effective_key = saved_key if saved_key else key | |
| # Always enable both search and KYS by default | |
| res = search_workflow(school, state, effective_key, use_search=True, use_kys=True) | |
| tbl = pd.DataFrame() | |
| if res.get("search") and res["search"].get("ok"): | |
| tbl = json_to_table(res["search"]["data"]) | |
| # Get school info and format suggestions with school names and UDISE codes | |
| school_info = res.get("school_info", []) | |
| suggestions = [] | |
| first_candidate = "" | |
| if school_info: | |
| # Format suggestions as "School Name (UDISE: 12345678901)" | |
| suggestions = [ | |
| f"{info['name']} (UDISE: {info['udise']})" | |
| for info in school_info | |
| ] | |
| first_candidate = school_info[0]['udise'] if school_info else "" | |
| else: | |
| suggestions = ["No matching schools found"] | |
| # Always save the key to state if a new one is provided | |
| new_saved_key = key or saved_key | |
| # Return the first candidate along with other values | |
| return ( | |
| res.get("search"), # output_json | |
| tbl, # search_table | |
| {"choices": suggestions, "__type__": "update"}, # Update dropdown choices | |
| first_candidate, # This will update udise_input with the UDISE code | |
| new_saved_key, # saved_key_state | |
| res.get("kys") # kys_output_json | |
| ) | |
| run.click( | |
| on_run, | |
| inputs=[inp, state_dropdown, search_key, saved_key_state], | |
| outputs=[ | |
| output_json, | |
| search_table, | |
| suggestions_dropdown, | |
| udise_input, # This will be updated with first_candidate | |
| saved_key_state, | |
| kys_output_json | |
| ] | |
| ) | |
| def on_select_suggestion(choice): | |
| # Extract UDISE code from the selected choice | |
| if not choice or choice in ["No matching schools found", "No UDISE codes found"]: | |
| return "" | |
| # Extract UDISE code from the format "School Name (UDISE: 12345678901)" | |
| match = re.search(r'\(UDISE:\s*(\d+)\)', choice) | |
| if match: | |
| return match.group(1) | |
| return "" | |
| suggestions_dropdown.change( | |
| on_select_suggestion, | |
| inputs=[suggestions_dropdown], | |
| outputs=[udise_input] | |
| ) | |
| def on_lookup_udise(udise_code): | |
| if not udise_code or not udise_code.strip().isdigit(): | |
| return {"ok": False, "error": "Provide a numeric UDISE code (6-14 digits)."}, pd.DataFrame() | |
| kys_res = call_kys_by_udise(udise_code.strip()) | |
| df = pd.DataFrame() | |
| if kys_res.get("ok"): | |
| df = to_table_from_kys(kys_res["data"]) if kys_res.get("data") else pd.DataFrame() | |
| return kys_res, df | |
| lookup_btn.click(on_lookup_udise, inputs=[udise_input], outputs=[kys_output_json, kys_table]) | |
| # Toggle visibility handler for raw JSON outputs | |
| def toggle_raw(visible: bool): | |
| return gr.update(visible=visible), gr.update(visible=visible) | |
| show_raw_checkbox.change(toggle_raw, inputs=[show_raw_checkbox], outputs=[output_json, kys_output_json]) | |
| if __name__ == "__main__": | |
| demo.launch() |