GodsDevProject commited on
Commit
021ecb3
·
verified ·
1 Parent(s): 65bc042

Create ingest/generic_public_foia.py

Browse files
Files changed (1) hide show
  1. ingest/generic_public_foia.py +10 -6
ingest/generic_public_foia.py CHANGED
@@ -1,14 +1,12 @@
1
  import abc
2
  import asyncio
 
3
  from typing import List, Dict
 
 
4
 
5
 
6
  class GenericFOIAAdapter(abc.ABC):
7
- """
8
- Base adapter for all public FOIA Electronic Reading Rooms.
9
- HF-safe: public-only, rate-limited, explicit stub/live labeling.
10
- """
11
-
12
  source_name: str = "UNKNOWN"
13
  base_url: str = ""
14
  source_type: str = "stub" # stub | live
@@ -16,6 +14,7 @@ class GenericFOIAAdapter(abc.ABC):
16
  def __init__(self, rate_limit_seconds: float = 1.0):
17
  self.rate_limit_seconds = rate_limit_seconds
18
  self._last_call = 0.0
 
19
 
20
  async def _rate_limit(self):
21
  now = asyncio.get_event_loop().time()
@@ -24,15 +23,20 @@ class GenericFOIAAdapter(abc.ABC):
24
  await asyncio.sleep(self.rate_limit_seconds - delta)
25
  self._last_call = asyncio.get_event_loop().time()
26
 
 
 
 
27
  async def _stub_result(self, query: str) -> List[Dict]:
28
  await self._rate_limit()
 
29
  return [{
30
  "source": self.source_name,
31
  "title": f"{self.source_name} FOIA result for '{query}'",
32
  "url": self.base_url,
33
  "snippet": "Public FOIA reading room placeholder result (stub).",
34
  "live": False,
35
- "extended": False
 
36
  }]
37
 
38
  @abc.abstractmethod
 
1
  import abc
2
  import asyncio
3
+ import time
4
  from typing import List, Dict
5
+ from ingest.robots import allowed
6
+ from ingest.health import HealthStatus
7
 
8
 
9
  class GenericFOIAAdapter(abc.ABC):
 
 
 
 
 
10
  source_name: str = "UNKNOWN"
11
  base_url: str = ""
12
  source_type: str = "stub" # stub | live
 
14
  def __init__(self, rate_limit_seconds: float = 1.0):
15
  self.rate_limit_seconds = rate_limit_seconds
16
  self._last_call = 0.0
17
+ self.last_health: HealthStatus | None = None
18
 
19
  async def _rate_limit(self):
20
  now = asyncio.get_event_loop().time()
 
23
  await asyncio.sleep(self.rate_limit_seconds - delta)
24
  self._last_call = asyncio.get_event_loop().time()
25
 
26
+ async def _guard(self, url: str) -> bool:
27
+ return await allowed(url)
28
+
29
  async def _stub_result(self, query: str) -> List[Dict]:
30
  await self._rate_limit()
31
+ self.last_health = HealthStatus(ok=True, latency_ms=0)
32
  return [{
33
  "source": self.source_name,
34
  "title": f"{self.source_name} FOIA result for '{query}'",
35
  "url": self.base_url,
36
  "snippet": "Public FOIA reading room placeholder result (stub).",
37
  "live": False,
38
+ "extended": False,
39
+ "health": self.last_health.__dict__
40
  }]
41
 
42
  @abc.abstractmethod