GodsDevProject commited on
Commit
7cd4f4a
·
verified ·
1 Parent(s): 021ecb3

Create ingest/extended_live_adapter.py

Browse files
Files changed (1) hide show
  1. ingest/extended_live_adapter.py +17 -12
ingest/extended_live_adapter.py CHANGED
@@ -1,20 +1,20 @@
1
  import aiohttp
 
2
  from ingest.generic_public_foia import GenericFOIAAdapter
3
-
4
 
5
  class ExtendedLiveAdapter(GenericFOIAAdapter):
6
- """
7
- Extended live adapter.
8
- Activated only when Extended Features mode is enabled.
9
- Uses conservative keyword-presence search on public FOIA pages.
10
- """
11
-
12
  source_type = "live"
13
  extended_only = True
14
 
15
  async def _safe_search(self, search_url: str, query: str):
 
16
  await self._rate_limit()
17
 
 
 
 
 
18
  try:
19
  async with aiohttp.ClientSession() as session:
20
  async with session.get(
@@ -23,20 +23,25 @@ class ExtendedLiveAdapter(GenericFOIAAdapter):
23
  timeout=12,
24
  headers={"User-Agent": "HF-FOIA-Search/1.0"}
25
  ) as resp:
26
- if resp.status != 200:
27
- return []
28
  text = await resp.text()
 
29
 
30
  if query.lower() in text.lower():
 
31
  return [{
32
  "source": self.source_name,
33
  "title": f"{self.source_name} public document mentioning '{query}'",
34
  "url": search_url,
35
  "snippet": "Public FOIA-released material (extended live mode).",
36
  "live": True,
37
- "extended": True
 
38
  }]
39
- except Exception:
 
40
  return []
41
 
42
- return []
 
 
 
 
1
  import aiohttp
2
+ import time
3
  from ingest.generic_public_foia import GenericFOIAAdapter
4
+ from ingest.health import HealthStatus
5
 
6
  class ExtendedLiveAdapter(GenericFOIAAdapter):
 
 
 
 
 
 
7
  source_type = "live"
8
  extended_only = True
9
 
10
  async def _safe_search(self, search_url: str, query: str):
11
+ start = time.perf_counter()
12
  await self._rate_limit()
13
 
14
+ if not await self._guard(search_url):
15
+ self.last_health = HealthStatus(ok=False, latency_ms=0, error="robots.txt")
16
+ return []
17
+
18
  try:
19
  async with aiohttp.ClientSession() as session:
20
  async with session.get(
 
23
  timeout=12,
24
  headers={"User-Agent": "HF-FOIA-Search/1.0"}
25
  ) as resp:
 
 
26
  text = await resp.text()
27
+ latency = int((time.perf_counter() - start) * 1000)
28
 
29
  if query.lower() in text.lower():
30
+ self.last_health = HealthStatus(ok=True, latency_ms=latency)
31
  return [{
32
  "source": self.source_name,
33
  "title": f"{self.source_name} public document mentioning '{query}'",
34
  "url": search_url,
35
  "snippet": "Public FOIA-released material (extended live mode).",
36
  "live": True,
37
+ "extended": True,
38
+ "health": self.last_health.__dict__
39
  }]
40
+
41
+ self.last_health = HealthStatus(ok=True, latency_ms=latency)
42
  return []
43
 
44
+ except Exception as e:
45
+ latency = int((time.perf_counter() - start) * 1000)
46
+ self.last_health = HealthStatus(ok=False, latency_ms=latency, error=str(e))
47
+ return []