Spaces:
Running
Running
File size: 5,208 Bytes
ef31980 868e976 ef31980 f930d0c ef31980 f930d0c ef31980 f930d0c ef31980 f930d0c ef31980 f930d0c ef31980 f930d0c 0767317 ef31980 0767317 ef31980 f930d0c ef31980 f930d0c ef31980 f930d0c 45d3479 0767317 45d3479 f930d0c ef31980 f930d0c ef31980 f930d0c ef31980 f930d0c ef31980 f930d0c ef31980 f930d0c ef31980 0767317 ef31980 f930d0c ef31980 f930d0c ef31980 f930d0c ef31980 f930d0c ef31980 f930d0c ef31980 f930d0c ef31980 f930d0c ef31980 f930d0c ef31980 f930d0c ef31980 f930d0c ef31980 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 |
# ====================================================
# web_search.py
# ====================================================
"""
Minimal Tavily wrapper: run a web search and return a list of UDISE codes.
Enhancement:
- Optional village support for more precise queries
"""
import os
import re
from typing import List, Optional
# optional Tavily SDK
try:
from tavily import TavilyClient
except Exception:
TavilyClient = None
# ----------------------------------------------------
# State → UDISE prefix mapping
# ----------------------------------------------------
STATE_TO_UDISE_CODE = {
"Jammu & Kashmir": "01", "Himachal Pradesh": "02", "Punjab": "03",
"Chandigarh": "04", "Uttarakhand": "05", "Haryana": "06", "Delhi": "07",
"Rajasthan": "08", "Uttar Pradesh": "09", "Bihar": "10", "Sikkim": "11",
"Arunachal Pradesh": "12", "Nagaland": "13", "Manipur": "14", "Mizoram": "15",
"Tripura": "16", "Meghalaya": "17", "Assam": "18", "West Bengal": "19",
"Jharkhand": "20", "Odisha": "21", "Chhattisgarh": "22",
"Madhya Pradesh": "23", "Gujarat": "24", "Daman & Diu": "25",
"Dadra & Nagar Haveli": "26", "Maharashtra": "27",
"Andhra Pradesh": "28", "Karnataka": "29", "Goa": "30",
"Lakshadweep": "31", "Kerala": "32", "Tamil Nadu": "33",
"Puducherry": "34", "Andaman & Nicobar Islands": "35",
"Telangana": "36", "Ladakh": "37",
}
# strict 11-digit UDISE match
_UDISE_RE = re.compile(r"(?<!\d)(\d{11})(?!\d)")
# ----------------------------------------------------
# Query builder (Village-aware)
# ----------------------------------------------------
def _build_query(
school_name: Optional[str],
state_name: Optional[str],
district: Optional[str],
village: Optional[str] = None,
) -> str:
parts = ["UDISE code of"]
if school_name:
parts.append(f"School {school_name.strip()}")
if village:
parts.append(f"in village {village.strip()}")
if district:
parts.append(f"district {district.strip()}")
if state_name:
parts.append(f"state {state_name.strip()}")
return " ".join(parts).strip()
def _call_tavily(api_key: Optional[str], query: str):
key = api_key or os.getenv("TAVILY_API_KEY")
if not key:
return {"ok": False, "error": "No Tavily API key provided."}
if TavilyClient is None:
return {"ok": False, "error": "tavily package not installed."}
try:
client = TavilyClient(key)
print(query)
resp = client.search(query=query, country="india")
print(resp)
return {"ok": True, "data": resp}
except Exception as e:
return {"ok": False, "error": str(e)}
def _normalize_state_key(state_name: Optional[str]) -> Optional[str]:
if not state_name:
return None
cleaned = re.sub(r"[^A-Za-z]", "", state_name).lower()
for k in STATE_TO_UDISE_CODE:
if re.sub(r"[^A-Za-z]", "", k).lower() == cleaned:
return k
return None
# ----------------------------------------------------
# Public API
# ----------------------------------------------------
def tavily_search_codes(
school_name: Optional[str],
state_name: Optional[str] = None,
district: Optional[str] = None,
village: Optional[str] = None,
api_key: Optional[str] = None,
enforce_state_prefix: bool = True,
) -> List[str]:
"""
Perform a Tavily search and return a list of unique UDISE codes.
Village is used only to improve search precision.
"""
if not school_name:
return []
query = _build_query(school_name, state_name, district, village)
call = _call_tavily(api_key, query)
if not call.get("ok"):
return []
raw = call.get("data") or {}
snippets = []
if isinstance(raw, dict):
candidates = (
raw.get("results")
or raw.get("data", {}).get("results")
or raw.get("items")
or []
)
for item in candidates:
if isinstance(item, dict):
snippets.append(
" ".join(
[
str(item.get("title", "")),
str(item.get("content", "")),
str(item.get("text", "")),
str(item.get("url", "")),
]
)
)
else:
snippets.append(str(item))
elif isinstance(raw, list):
snippets = [str(x) for x in raw]
else:
snippets = [str(raw)]
allowed_prefix = None
state_key = _normalize_state_key(state_name)
if enforce_state_prefix and state_key:
allowed_prefix = STATE_TO_UDISE_CODE.get(state_key)
found, seen = [], set()
for text in snippets:
for m in _UDISE_RE.finditer(text):
code = m.group(1)
if code in seen:
continue
prefix = code[:2]
if prefix not in STATE_TO_UDISE_CODE.values():
continue
if allowed_prefix and prefix != allowed_prefix:
continue
seen.add(code)
found.append(code)
return found
|