FOIA_Doc_Search / ingest /extended_live_adapter.py
GodsDevProject's picture
Create ingest/extended_live_adapter.py
7cd4f4a verified
import aiohttp
import time
from ingest.generic_public_foia import GenericFOIAAdapter
from ingest.health import HealthStatus
class ExtendedLiveAdapter(GenericFOIAAdapter):
source_type = "live"
extended_only = True
async def _safe_search(self, search_url: str, query: str):
start = time.perf_counter()
await self._rate_limit()
if not await self._guard(search_url):
self.last_health = HealthStatus(ok=False, latency_ms=0, error="robots.txt")
return []
try:
async with aiohttp.ClientSession() as session:
async with session.get(
search_url,
params={"q": query},
timeout=12,
headers={"User-Agent": "HF-FOIA-Search/1.0"}
) as resp:
text = await resp.text()
latency = int((time.perf_counter() - start) * 1000)
if query.lower() in text.lower():
self.last_health = HealthStatus(ok=True, latency_ms=latency)
return [{
"source": self.source_name,
"title": f"{self.source_name} public document mentioning '{query}'",
"url": search_url,
"snippet": "Public FOIA-released material (extended live mode).",
"live": True,
"extended": True,
"health": self.last_health.__dict__
}]
self.last_health = HealthStatus(ok=True, latency_ms=latency)
return []
except Exception as e:
latency = int((time.perf_counter() - start) * 1000)
self.last_health = HealthStatus(ok=False, latency_ms=latency, error=str(e))
return []