GodsDevProject commited on
Commit
9787c1b
·
verified ·
1 Parent(s): 79acc4d

Create ingest/generic_public_foia.py

Browse files
Files changed (1) hide show
  1. ingest/generic_public_foia.py +36 -28
ingest/generic_public_foia.py CHANGED
@@ -1,29 +1,37 @@
1
  import time
2
- import asyncio
3
- from typing import List, Dict
4
-
5
-
6
- class GenericFOIAAdapter:
7
- """
8
- Base class for public-only FOIA adapters.
9
- Non-abstract to avoid HF instantiation errors.
10
- """
11
-
12
- source_name: str = "UNKNOWN"
13
- base_url: str = ""
14
- is_stub: bool = True
15
-
16
- def __init__(self, rate_limit: float = 1.0):
17
- self.rate_limit = rate_limit
18
- self._last_call = 0.0
19
- self.last_latency = 0.0
20
-
21
- async def _throttle(self):
22
- delta = time.time() - self._last_call
23
- if delta < self.rate_limit:
24
- await asyncio.sleep(self.rate_limit - delta)
25
- self._last_call = time.time()
26
-
27
- async def search(self, query: str) -> List[Dict]:
28
- # Safe default: no results
29
- return []
 
 
 
 
 
 
 
 
 
1
  import time
2
+ import aiohttp
3
+ from ingest.generic_public_foia import GenericFOIAAdapter
4
+
5
+
6
+ class CIAAdapter(GenericFOIAAdapter):
7
+ source_name = "CIA FOIA Reading Room"
8
+ base_url = "https://www.cia.gov/readingroom/search/site"
9
+ is_stub = False
10
+
11
+ def __init__(self, live: bool = False):
12
+ super().__init__(rate_limit=2.0)
13
+ self.live = live
14
+
15
+ async def search(self, query):
16
+ if not self.live:
17
+ return []
18
+
19
+ await self._throttle()
20
+ start = time.time()
21
+
22
+ async with aiohttp.ClientSession() as session:
23
+ async with session.get(
24
+ self.base_url,
25
+ params={"search_api_fulltext": query},
26
+ timeout=20
27
+ ) as resp:
28
+ await resp.text()
29
+
30
+ self.last_latency = time.time() - start
31
+
32
+ # Public-safe placeholder parse
33
+ return [{
34
+ "title": f"CIA FOIA document mentioning '{query}'",
35
+ "url": self.base_url,
36
+ "snippet": "Publicly released FOIA document."
37
+ }]