GodsDevProject commited on
Commit
b02fa97
·
verified ·
1 Parent(s): 145898a

Update ingest/fbi_vault.py

Browse files
Files changed (1) hide show
  1. ingest/fbi_vault.py +29 -34
ingest/fbi_vault.py CHANGED
@@ -1,44 +1,39 @@
1
- import aiohttp
2
- import time
3
  from ingest.generic_public_foia import GenericFOIAAdapter
4
- from ingest.health import HealthStatus
5
 
6
  class FBIAdapter(GenericFOIAAdapter):
 
 
 
 
 
7
  source_name = "FBI Vault"
8
- base_url = "https://vault.fbi.gov"
9
- source_type = "live"
10
 
11
- async def search(self, query: str):
12
- start = time.perf_counter()
13
- await self._rate_limit()
14
 
15
- url = f"{self.base_url}/search"
16
- if not await self._guard(url):
17
- self.last_health = HealthStatus(ok=False, latency_ms=0, error="robots.txt")
18
- return []
19
 
20
  try:
21
- async with aiohttp.ClientSession() as session:
22
- async with session.get(url, params={"q": query}, timeout=12) as resp:
23
- text = await resp.text()
24
- latency = int((time.perf_counter() - start) * 1000)
25
-
26
- if "reading-room" in text.lower():
27
- self.last_health = HealthStatus(ok=True, latency_ms=latency)
28
- return [{
29
- "source": self.source_name,
30
- "title": f"FBI Vault document mentioning '{query}'",
31
- "url": f"{url}?q={query}",
32
- "snippet": "Public FBI FOIA document from The Vault.",
33
- "live": True,
34
- "extended": False,
35
- "health": self.last_health.__dict__
36
- }]
37
-
38
- self.last_health = HealthStatus(ok=True, latency_ms=latency)
39
  return []
40
 
41
- except Exception as e:
42
- latency = int((time.perf_counter() - start) * 1000)
43
- self.last_health = HealthStatus(ok=False, latency_ms=latency, error=str(e))
44
- return []
 
 
 
 
 
1
+ import requests
2
+ from typing import List, Dict
3
  from ingest.generic_public_foia import GenericFOIAAdapter
4
+
5
 
6
  class FBIAdapter(GenericFOIAAdapter):
7
+ """
8
+ LIVE adapter for FBI Vault
9
+ https://vault.fbi.gov
10
+ """
11
+
12
  source_name = "FBI Vault"
13
+ base_url = "https://vault.fbi.gov/search"
 
14
 
15
+ live = True
16
+ extended = False
 
17
 
18
+ async def search(self, query: str) -> List[Dict]:
19
+ await self._rate_limit()
 
 
20
 
21
  try:
22
+ resp = requests.get(
23
+ self.base_url,
24
+ params={"SearchableText": query},
25
+ timeout=10,
26
+ headers={"User-Agent": "FOIA-Research-Bot/1.0"}
27
+ )
28
+ resp.raise_for_status()
29
+ except Exception:
 
 
 
 
 
 
 
 
 
 
30
  return []
31
 
32
+ return [{
33
+ "source": self.source_name,
34
+ "title": "FBI Vault Document",
35
+ "url": self.base_url,
36
+ "snippet": "Public FBI FOIA record.",
37
+ "live": True,
38
+ "extended": False,
39
+ }]