GodsDevProject commited on
Commit
8085ea8
·
verified ·
1 Parent(s): ee29427

Update ingest/generic_public_foia.py

Browse files
Files changed (1) hide show
  1. ingest/generic_public_foia.py +46 -7
ingest/generic_public_foia.py CHANGED
@@ -2,37 +2,76 @@
2
 
3
  import abc
4
  import asyncio
5
- import requests
6
  from typing import List, Dict
7
 
8
 
9
  class GenericFOIAAdapter(abc.ABC):
10
  """
11
  Base adapter for all public FOIA Electronic Reading Rooms.
12
- Enforces rate-limiting, public-only access, and safe defaults.
 
 
 
 
 
13
  """
14
 
 
15
  source_name: str = "UNKNOWN"
 
 
16
  base_url: str = ""
17
 
 
 
 
18
  def __init__(self, rate_limit_seconds: float = 1.0):
19
  self.rate_limit_seconds = rate_limit_seconds
20
  self._last_call = 0.0
21
 
22
  async def _rate_limit(self):
23
- delta = asyncio.get_event_loop().time() - self._last_call
 
 
 
 
24
  if delta < self.rate_limit_seconds:
25
  await asyncio.sleep(self.rate_limit_seconds - delta)
26
  self._last_call = asyncio.get_event_loop().time()
27
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
  @abc.abstractmethod
29
  async def search(self, query: str) -> List[Dict]:
30
  """
31
  Perform a public FOIA search.
 
32
  Must return a list of dicts with:
33
- - source
34
- - title
35
- - url
36
- - snippet
 
 
 
 
37
  """
38
  raise NotImplementedError
 
2
 
3
  import abc
4
  import asyncio
 
5
  from typing import List, Dict
6
 
7
 
8
  class GenericFOIAAdapter(abc.ABC):
9
  """
10
  Base adapter for all public FOIA Electronic Reading Rooms.
11
+
12
+ This class is SAFE for Hugging Face Spaces:
13
+ - Public-only access
14
+ - Rate-limited
15
+ - No authentication
16
+ - Explicit stub/live disclosure
17
  """
18
 
19
+ # Human-readable source name (shown in UI)
20
  source_name: str = "UNKNOWN"
21
+
22
+ # Base public reading room URL
23
  base_url: str = ""
24
 
25
+ # Either "stub" or "live"
26
+ source_type: str = "stub"
27
+
28
  def __init__(self, rate_limit_seconds: float = 1.0):
29
  self.rate_limit_seconds = rate_limit_seconds
30
  self._last_call = 0.0
31
 
32
  async def _rate_limit(self):
33
+ """
34
+ Enforce per-adapter rate limiting (HF-safe).
35
+ """
36
+ now = asyncio.get_event_loop().time()
37
+ delta = now - self._last_call
38
  if delta < self.rate_limit_seconds:
39
  await asyncio.sleep(self.rate_limit_seconds - delta)
40
  self._last_call = asyncio.get_event_loop().time()
41
 
42
+ @property
43
+ def is_live(self) -> bool:
44
+ """
45
+ Convenience flag used by UI and filters.
46
+ """
47
+ return self.source_type == "live"
48
+
49
+ async def _stub_result(self, query: str) -> List[Dict]:
50
+ """
51
+ Default placeholder result for stub adapters.
52
+ """
53
+ await self._rate_limit()
54
+ return [{
55
+ "source": self.source_name,
56
+ "title": f"{self.source_name} FOIA result for '{query}'",
57
+ "url": self.base_url,
58
+ "snippet": "Public FOIA reading room placeholder result (stub).",
59
+ "live": False
60
+ }]
61
+
62
  @abc.abstractmethod
63
  async def search(self, query: str) -> List[Dict]:
64
  """
65
  Perform a public FOIA search.
66
+
67
  Must return a list of dicts with:
68
+ - source (str)
69
+ - title (str)
70
+ - url (str)
71
+ - snippet (str)
72
+ - live (bool)
73
+
74
+ Stub adapters should normally call self._stub_result(query).
75
+ Live adapters should set live=True.
76
  """
77
  raise NotImplementedError