Spaces:
Sleeping
Sleeping
Create ingest/adapters/public_agencies.py
Browse files
ingest/adapters/public_agencies.py
CHANGED
|
@@ -1,113 +1,49 @@
|
|
| 1 |
import aiohttp
|
| 2 |
from ingest.generic_public_foia import GenericFOIAAdapter
|
| 3 |
|
| 4 |
-
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
base_url = "https://www.cia.gov/readingroom"
|
| 8 |
-
is_live = True
|
| 9 |
-
|
| 10 |
-
async def search(self, query):
|
| 11 |
-
if not self.allowed("/search"):
|
| 12 |
-
return []
|
| 13 |
-
await self._rate_limit()
|
| 14 |
-
url = f"{self.base_url}/search/site/{query}"
|
| 15 |
-
return [{
|
| 16 |
-
"source": self.source_name,
|
| 17 |
-
"title": f"CIA document mentioning {query}",
|
| 18 |
-
"url": url,
|
| 19 |
-
"snippet": "Public CIA FOIA document",
|
| 20 |
-
"agency": "CIA"
|
| 21 |
-
}]
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
class FBIAdapter(GenericFOIAAdapter):
|
| 25 |
-
source_name = "FBI Vault"
|
| 26 |
-
base_url = "https://vault.fbi.gov"
|
| 27 |
-
is_live = True
|
| 28 |
-
|
| 29 |
-
async def search(self, query):
|
| 30 |
-
if not self.allowed("/search"):
|
| 31 |
-
return []
|
| 32 |
-
await self._rate_limit()
|
| 33 |
-
url = f"{self.base_url}/search?SearchableText={query}"
|
| 34 |
-
return [{
|
| 35 |
-
"source": self.source_name,
|
| 36 |
-
"title": f"FBI Vault file: {query}",
|
| 37 |
-
"url": url,
|
| 38 |
-
"snippet": "Public FBI FOIA record",
|
| 39 |
-
"agency": "FBI"
|
| 40 |
-
}]
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
class NSAAdapter(GenericFOIAAdapter):
|
| 44 |
-
source_name = "NSA FOIA Reading Room"
|
| 45 |
-
base_url = "https://www.nsa.gov/resources/everyone/foia"
|
| 46 |
is_live = True
|
| 47 |
|
| 48 |
async def search(self, query):
|
| 49 |
await self._rate_limit()
|
| 50 |
return [{
|
| 51 |
"source": self.source_name,
|
| 52 |
-
"
|
|
|
|
| 53 |
"url": self.base_url,
|
| 54 |
-
"snippet": "
|
| 55 |
-
"agency": "NSA"
|
| 56 |
}]
|
| 57 |
|
| 58 |
|
| 59 |
-
class
|
| 60 |
-
source_name = "
|
| 61 |
-
base_url = "https://
|
| 62 |
is_live = True
|
| 63 |
|
| 64 |
async def search(self, query):
|
| 65 |
await self._rate_limit()
|
| 66 |
return [{
|
| 67 |
"source": self.source_name,
|
| 68 |
-
"
|
|
|
|
| 69 |
"url": self.base_url,
|
| 70 |
-
"snippet": "
|
| 71 |
-
"agency": "DoD"
|
| 72 |
}]
|
| 73 |
|
| 74 |
|
| 75 |
-
class
|
| 76 |
-
source_name = "
|
| 77 |
-
base_url = "https://
|
| 78 |
is_live = True
|
| 79 |
|
| 80 |
async def search(self, query):
|
| 81 |
await self._rate_limit()
|
| 82 |
return [{
|
| 83 |
"source": self.source_name,
|
| 84 |
-
"
|
| 85 |
-
"
|
| 86 |
-
"
|
| 87 |
-
"
|
| 88 |
-
}]
|
| 89 |
-
|
| 90 |
-
|
| 91 |
-
class HostedCollectionAdapter(GenericFOIAAdapter):
|
| 92 |
-
"""
|
| 93 |
-
Used for AATIP / SAP / Special Activities when documents are
|
| 94 |
-
publicly hosted by another agency (CIA / DoD / NRO).
|
| 95 |
-
"""
|
| 96 |
-
|
| 97 |
-
is_live = False
|
| 98 |
-
|
| 99 |
-
def __init__(self, label, host_agency, host_url):
|
| 100 |
-
super().__init__()
|
| 101 |
-
self.source_name = label
|
| 102 |
-
self.base_url = host_url
|
| 103 |
-
self.host_agency = host_agency
|
| 104 |
-
|
| 105 |
-
async def search(self, query):
|
| 106 |
-
return [{
|
| 107 |
-
"source": self.source_name,
|
| 108 |
-
"title": f"{self.source_name} material referencing {query}",
|
| 109 |
-
"url": self.base_url,
|
| 110 |
-
"snippet": f"Publicly released via {self.host_agency}",
|
| 111 |
-
"agency": self.host_agency,
|
| 112 |
-
"collection_type": "Hosted Public Release"
|
| 113 |
}]
|
|
|
|
| 1 |
import aiohttp
|
| 2 |
from ingest.generic_public_foia import GenericFOIAAdapter
|
| 3 |
|
| 4 |
+
class DOJAdapter(GenericFOIAAdapter):
|
| 5 |
+
source_name = "DOJ FOIA Library"
|
| 6 |
+
base_url = "https://www.justice.gov/oip/foia-library"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 7 |
is_live = True
|
| 8 |
|
| 9 |
async def search(self, query):
|
| 10 |
await self._rate_limit()
|
| 11 |
return [{
|
| 12 |
"source": self.source_name,
|
| 13 |
+
"agency": "DOJ",
|
| 14 |
+
"title": f"DOJ FOIA Library reference to {query}",
|
| 15 |
"url": self.base_url,
|
| 16 |
+
"snippet": "Public DOJ FOIA library material"
|
|
|
|
| 17 |
}]
|
| 18 |
|
| 19 |
|
| 20 |
+
class DHSAdapter(GenericFOIAAdapter):
|
| 21 |
+
source_name = "DHS FOIA Library"
|
| 22 |
+
base_url = "https://www.dhs.gov/foia-library"
|
| 23 |
is_live = True
|
| 24 |
|
| 25 |
async def search(self, query):
|
| 26 |
await self._rate_limit()
|
| 27 |
return [{
|
| 28 |
"source": self.source_name,
|
| 29 |
+
"agency": "DHS",
|
| 30 |
+
"title": f"DHS FOIA Library reference to {query}",
|
| 31 |
"url": self.base_url,
|
| 32 |
+
"snippet": "Public DHS FOIA library material"
|
|
|
|
| 33 |
}]
|
| 34 |
|
| 35 |
|
| 36 |
+
class StateDeptAdapter(GenericFOIAAdapter):
|
| 37 |
+
source_name = "State Department FOIA Reading Room"
|
| 38 |
+
base_url = "https://foia.state.gov/Search"
|
| 39 |
is_live = True
|
| 40 |
|
| 41 |
async def search(self, query):
|
| 42 |
await self._rate_limit()
|
| 43 |
return [{
|
| 44 |
"source": self.source_name,
|
| 45 |
+
"agency": "State",
|
| 46 |
+
"title": f"State Department FOIA record mentioning {query}",
|
| 47 |
+
"url": f"{self.base_url}?searchText={query}",
|
| 48 |
+
"snippet": "Public State Department FOIA record"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 49 |
}]
|