# ==================================================== # web_search.py # ==================================================== """ Minimal Tavily wrapper: run a web search and return a list of UDISE codes. Enhancement: - Optional village support for more precise queries """ import os import re from typing import List, Optional # optional Tavily SDK try: from tavily import TavilyClient except Exception: TavilyClient = None # ---------------------------------------------------- # State → UDISE prefix mapping # ---------------------------------------------------- STATE_TO_UDISE_CODE = { "Jammu & Kashmir": "01", "Himachal Pradesh": "02", "Punjab": "03", "Chandigarh": "04", "Uttarakhand": "05", "Haryana": "06", "Delhi": "07", "Rajasthan": "08", "Uttar Pradesh": "09", "Bihar": "10", "Sikkim": "11", "Arunachal Pradesh": "12", "Nagaland": "13", "Manipur": "14", "Mizoram": "15", "Tripura": "16", "Meghalaya": "17", "Assam": "18", "West Bengal": "19", "Jharkhand": "20", "Odisha": "21", "Chhattisgarh": "22", "Madhya Pradesh": "23", "Gujarat": "24", "Daman & Diu": "25", "Dadra & Nagar Haveli": "26", "Maharashtra": "27", "Andhra Pradesh": "28", "Karnataka": "29", "Goa": "30", "Lakshadweep": "31", "Kerala": "32", "Tamil Nadu": "33", "Puducherry": "34", "Andaman & Nicobar Islands": "35", "Telangana": "36", "Ladakh": "37", } # strict 11-digit UDISE match _UDISE_RE = re.compile(r"(? str: parts = ["UDISE code of"] if school_name: parts.append(f"School {school_name.strip()}") if village: parts.append(f"in village {village.strip()}") if district: parts.append(f"district {district.strip()}") if state_name: parts.append(f"state {state_name.strip()}") return " ".join(parts).strip() def _call_tavily(api_key: Optional[str], query: str): key = api_key or os.getenv("TAVILY_API_KEY") if not key: return {"ok": False, "error": "No Tavily API key provided."} if TavilyClient is None: return {"ok": False, "error": "tavily package not installed."} try: client = TavilyClient(key) print(query) resp = client.search(query=query, country="india") print(resp) return {"ok": True, "data": resp} except Exception as e: return {"ok": False, "error": str(e)} def _normalize_state_key(state_name: Optional[str]) -> Optional[str]: if not state_name: return None cleaned = re.sub(r"[^A-Za-z]", "", state_name).lower() for k in STATE_TO_UDISE_CODE: if re.sub(r"[^A-Za-z]", "", k).lower() == cleaned: return k return None # ---------------------------------------------------- # Public API # ---------------------------------------------------- def tavily_search_codes( school_name: Optional[str], state_name: Optional[str] = None, district: Optional[str] = None, village: Optional[str] = None, api_key: Optional[str] = None, enforce_state_prefix: bool = True, ) -> List[str]: """ Perform a Tavily search and return a list of unique UDISE codes. Village is used only to improve search precision. """ if not school_name: return [] query = _build_query(school_name, state_name, district, village) call = _call_tavily(api_key, query) if not call.get("ok"): return [] raw = call.get("data") or {} snippets = [] if isinstance(raw, dict): candidates = ( raw.get("results") or raw.get("data", {}).get("results") or raw.get("items") or [] ) for item in candidates: if isinstance(item, dict): snippets.append( " ".join( [ str(item.get("title", "")), str(item.get("content", "")), str(item.get("text", "")), str(item.get("url", "")), ] ) ) else: snippets.append(str(item)) elif isinstance(raw, list): snippets = [str(x) for x in raw] else: snippets = [str(raw)] allowed_prefix = None state_key = _normalize_state_key(state_name) if enforce_state_prefix and state_key: allowed_prefix = STATE_TO_UDISE_CODE.get(state_key) found, seen = [], set() for text in snippets: for m in _UDISE_RE.finditer(text): code = m.group(1) if code in seen: continue prefix = code[:2] if prefix not in STATE_TO_UDISE_CODE.values(): continue if allowed_prefix and prefix != allowed_prefix: continue seen.add(code) found.append(code) return found