GodsDevProject commited on
Commit
5261438
·
verified ·
1 Parent(s): f680dc4

Create ingest/adapters/fbi.py

Browse files
Files changed (1) hide show
  1. ingest/adapters/fbi.py +32 -9
ingest/adapters/fbi.py CHANGED
@@ -1,11 +1,34 @@
1
- from ingest.generic_public_foia import PublicFOIAAdapter
 
 
2
 
3
- class FBIAdapter(PublicFOIAAdapter):
4
- source_name = "FBI FOIA Reading Room"
5
- base_url = "https://vault.fbi.gov"
 
6
 
7
- async def search(self, query):
8
- return await self.fetch_public_results(
9
- query=query,
10
- search_path="/search",
11
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import aiohttp
2
+ from bs4 import BeautifulSoup
3
+ from ingest.generic_public_foia import GenericFOIAAdapter
4
 
5
+ class FBIAdapter(GenericFOIAAdapter):
6
+ source_name = "FBI Vault"
7
+ agency = "FBI"
8
+ base_url = "https://vault.fbi.gov/search"
9
 
10
+ async def search(self, query: str):
11
+ await self._rate_limit()
12
+ params = {"SearchableText": query}
13
+
14
+ async with aiohttp.ClientSession() as session:
15
+ async with session.get(self.base_url, params=params) as resp:
16
+ html = await resp.text()
17
+
18
+ soup = BeautifulSoup(html, "html.parser")
19
+ results = []
20
+
21
+ for r in soup.select(".item"):
22
+ title_el = r.select_one("a")
23
+ if not title_el:
24
+ continue
25
+
26
+ results.append({
27
+ "source": self.source_name,
28
+ "agency": self.agency,
29
+ "title": title_el.get_text(strip=True),
30
+ "url": title_el["href"],
31
+ "snippet": ""
32
+ })
33
+
34
+ return results