| import json
|
| from pathlib import Path
|
| from typing import Any, Dict, List, Optional
|
|
|
| import httpx
|
| from pydantic import BaseModel
|
|
|
|
|
| class InstitutionProfile(BaseModel):
|
| """Normalized institution evidence returned by the graph layer."""
|
|
|
| name: str = "Unknown Institution"
|
| ror_id: Optional[str] = None
|
| country: Optional[str] = None
|
| status: str = "unknown"
|
| established_year: Optional[int] = None
|
| reputation_score: float = 0.0
|
| source: str = "none"
|
| match_confidence: float = 0.0
|
| is_diploma_mill: bool = False
|
| warning: str = ""
|
|
|
|
|
| class GraphNavigator:
|
| def __init__(self, local_index_path: str = "data/global_academic_full_index_v2.json"):
|
| self.local_index_path = Path(local_index_path)
|
| self.api_url = "https://api.ror.org/organizations"
|
| self.local_cache = self._load_local_cache()
|
|
|
| def _load_local_cache(self) -> List[Dict[str, Any]]:
|
| try:
|
| with self.local_index_path.open("r", encoding="utf-8") as f:
|
| data = json.load(f)
|
| return data if isinstance(data, list) else []
|
| except (OSError, json.JSONDecodeError):
|
| return []
|
|
|
| @staticmethod
|
| def _normalize(value: str) -> str:
|
| return " ".join(value.lower().replace("&", "and").split())
|
|
|
| def _local_match(self, name: str) -> Optional[InstitutionProfile]:
|
| query = self._normalize(name)
|
| if len(query) < 3 or query == "unknown institution":
|
| return None
|
|
|
| for item in self.local_cache:
|
| item_name = item.get("name", "")
|
| normalized_name = self._normalize(item_name)
|
| if query == normalized_name:
|
| return InstitutionProfile(
|
| name=item_name,
|
| ror_id=item.get("ror_id"),
|
| country=item.get("country"),
|
| status=item.get("status", "active"),
|
| established_year=item.get("established_year") or item.get("established"),
|
| reputation_score=float(item.get("reputation_score", 5.0)),
|
| source="local_index",
|
| match_confidence=1.0,
|
| )
|
|
|
| return None
|
|
|
| @staticmethod
|
| def _extract_ror_name(item: Dict[str, Any]) -> str:
|
| names = item.get("names") or []
|
| for candidate in names:
|
| if candidate.get("types") and "ror_display" in candidate.get("types", []):
|
| return candidate.get("value", "Unknown Institution")
|
| return names[0].get("value", "Unknown Institution") if names else "Unknown Institution"
|
|
|
| @staticmethod
|
| def _extract_country(item: Dict[str, Any]) -> Optional[str]:
|
| locations = item.get("locations") or []
|
| if not locations:
|
| return None
|
| return locations[0].get("geonames_details", {}).get("country_name")
|
|
|
| async def navigate(self, name: str) -> InstitutionProfile:
|
| """
|
| Resolve an institution name against the local index and ROR.
|
|
|
| A ROR match is evidence that an organization exists, not proof that an
|
| uploaded credential is authentic. The logic layer must still evaluate
|
| document evidence and registry consistency before issuing a verdict.
|
| """
|
| local = self._local_match(name)
|
| if local:
|
| return local
|
|
|
| query = self._normalize(name)
|
| if len(query) < 3 or query == "unknown institution":
|
| return InstitutionProfile(name=name, warning="No institution name could be resolved from the credential.")
|
|
|
| try:
|
| async with httpx.AsyncClient(timeout=10.0) as client:
|
| response = await client.get(self.api_url, params={"query": name})
|
| response.raise_for_status()
|
| results = response.json().get("items", [])
|
| except (httpx.HTTPError, json.JSONDecodeError) as exc:
|
| return InstitutionProfile(
|
| name=name,
|
| status="unverified",
|
| source="ror_unavailable",
|
| warning=f"ROR lookup failed: {exc.__class__.__name__}",
|
| )
|
|
|
| if not results:
|
| return InstitutionProfile(
|
| name=name,
|
| status="unverified",
|
| source="ror",
|
| warning="Institution not found in ROR results.",
|
| )
|
|
|
| top_result = results[0]
|
| return InstitutionProfile(
|
| name=self._extract_ror_name(top_result),
|
| ror_id=top_result.get("id"),
|
| country=self._extract_country(top_result),
|
| status=top_result.get("status", "unknown"),
|
| source="ror",
|
| match_confidence=float(top_result.get("score") or 0.0),
|
| reputation_score=5.0,
|
| )
|
|
|
| async def verify_institution(self, name: str) -> InstitutionProfile:
|
| """Backward-compatible wrapper for older examples."""
|
| return await self.navigate(name)
|
|
|
|
|
| if __name__ == "__main__":
|
| import asyncio
|
|
|
| async def main():
|
| nav = GraphNavigator()
|
| res = await nav.verify_institution("University of Balamand")
|
| print(res.model_dump())
|
|
|
| asyncio.run(main())
|
|
|