GodsDevProject commited on
Commit
dd04378
·
verified ·
1 Parent(s): 122df81

Create ingest/adapters/fbi.py

Browse files
Files changed (1) hide show
  1. ingest/adapters/fbi.py +12 -8
ingest/adapters/fbi.py CHANGED
@@ -4,11 +4,14 @@ from ingest.generic_public_foia import GenericFOIAAdapter
4
 
5
  class FBIAdapter(GenericFOIAAdapter):
6
  source_name = "FBI Vault"
7
- agency = "FBI"
8
  base_url = "https://vault.fbi.gov/search"
9
 
10
  async def search(self, query: str):
 
 
 
11
  await self._rate_limit()
 
12
  params = {"SearchableText": query}
13
 
14
  async with aiohttp.ClientSession() as session:
@@ -18,17 +21,18 @@ class FBIAdapter(GenericFOIAAdapter):
18
  soup = BeautifulSoup(html, "html.parser")
19
  results = []
20
 
21
- for r in soup.select(".item"):
22
- title_el = r.select_one("a")
23
- if not title_el:
 
 
24
  continue
25
 
26
  results.append({
27
  "source": self.source_name,
28
- "agency": self.agency,
29
- "title": title_el.get_text(strip=True),
30
- "url": title_el["href"],
31
- "snippet": ""
32
  })
33
 
34
  return results
 
4
 
5
  class FBIAdapter(GenericFOIAAdapter):
6
  source_name = "FBI Vault"
 
7
  base_url = "https://vault.fbi.gov/search"
8
 
9
  async def search(self, query: str):
10
+ if not self.robots_allowed():
11
+ return []
12
+
13
  await self._rate_limit()
14
+
15
  params = {"SearchableText": query}
16
 
17
  async with aiohttp.ClientSession() as session:
 
21
  soup = BeautifulSoup(html, "html.parser")
22
  results = []
23
 
24
+ for item in soup.select(".document"):
25
+ title = item.select_one("h3")
26
+ link = item.select_one("a")
27
+
28
+ if not title or not link:
29
  continue
30
 
31
  results.append({
32
  "source": self.source_name,
33
+ "title": title.text.strip(),
34
+ "url": link["href"],
35
+ "snippet": title.text.strip()
 
36
  })
37
 
38
  return results