GodsDevProject commited on
Commit
8752aaa
·
verified ·
1 Parent(s): 7cd4f4a

Create ingest/fbi_vault.py

Browse files
Files changed (1) hide show
  1. ingest/fbi_vault.py +35 -26
ingest/fbi_vault.py CHANGED
@@ -1,35 +1,44 @@
1
  import aiohttp
2
- from ingest.base_adapter import BaseAdapter
 
 
3
 
4
- class FBIAdapter(BaseAdapter):
5
- name = "FBI Vault"
6
- source_type = "live"
7
  base_url = "https://vault.fbi.gov"
 
8
 
9
  async def search(self, query: str):
10
- params = {"q": query}
11
- results = []
 
 
 
 
 
12
 
13
- async with aiohttp.ClientSession() as session:
14
- async with session.get(
15
- f"{self.base_url}/search",
16
- params=params,
17
- timeout=15
18
- ) as resp:
19
- if resp.status != 200:
20
- return []
21
 
22
- text = await resp.text()
 
 
 
 
 
 
 
 
 
 
23
 
24
- # Minimal, safe parsing (no deep scrape)
25
- for line in text.splitlines():
26
- if "/reading-room/" in line:
27
- results.append({
28
- "title": f"FBI Vault result for {query}",
29
- "url": self.base_url,
30
- "snippet": "Public FBI Vault FOIA document",
31
- "source": self.name,
32
- "live": True
33
- })
34
 
35
- return results
 
 
 
 
1
  import aiohttp
2
+ import time
3
+ from ingest.generic_public_foia import GenericFOIAAdapter
4
+ from ingest.health import HealthStatus
5
 
6
+ class FBIAdapter(GenericFOIAAdapter):
7
+ source_name = "FBI Vault"
 
8
  base_url = "https://vault.fbi.gov"
9
+ source_type = "live"
10
 
11
  async def search(self, query: str):
12
+ start = time.perf_counter()
13
+ await self._rate_limit()
14
+
15
+ url = f"{self.base_url}/search"
16
+ if not await self._guard(url):
17
+ self.last_health = HealthStatus(ok=False, latency_ms=0, error="robots.txt")
18
+ return []
19
 
20
+ try:
21
+ async with aiohttp.ClientSession() as session:
22
+ async with session.get(url, params={"q": query}, timeout=12) as resp:
23
+ text = await resp.text()
24
+ latency = int((time.perf_counter() - start) * 1000)
 
 
 
25
 
26
+ if "reading-room" in text.lower():
27
+ self.last_health = HealthStatus(ok=True, latency_ms=latency)
28
+ return [{
29
+ "source": self.source_name,
30
+ "title": f"FBI Vault document mentioning '{query}'",
31
+ "url": f"{url}?q={query}",
32
+ "snippet": "Public FBI FOIA document from The Vault.",
33
+ "live": True,
34
+ "extended": False,
35
+ "health": self.last_health.__dict__
36
+ }]
37
 
38
+ self.last_health = HealthStatus(ok=True, latency_ms=latency)
39
+ return []
 
 
 
 
 
 
 
 
40
 
41
+ except Exception as e:
42
+ latency = int((time.perf_counter() - start) * 1000)
43
+ self.last_health = HealthStatus(ok=False, latency_ms=latency, error=str(e))
44
+ return []