Spaces:
Sleeping
Sleeping
Upload 31 files
Browse files- ingest/__init__.py +0 -0
- ingest/aatip_reading_room.py +9 -0
- ingest/agency_registry.py +39 -0
- ingest/air_force_foia_reading_room.py +9 -0
- ingest/army_foia_reading_room.py +9 -0
- ingest/cia_reading_room.py +21 -0
- ingest/coast_guard_foia_reading_room.py +9 -0
- ingest/darpa_reading_room.py +9 -0
- ingest/dhs_reading_room.py +6 -0
- ingest/dia_reading_room.py +6 -0
- ingest/dod_reading_room.py +6 -0
- ingest/dod_reading_room_live.py +20 -0
- ingest/fbi_vault.py +29 -0
- ingest/fbi_vault_live.py +20 -0
- ingest/generic_public_foia.py +46 -0
- ingest/ice_reading_room.py +6 -0
- ingest/loader.py +24 -0
- ingest/marine_corps_foia_reading_room.py +9 -0
- ingest/navy_foia_reading_room.py +9 -0
- ingest/nia_reading_room.py +9 -0
- ingest/nis_reading_room.py +9 -0
- ingest/nro_reading_room.py +9 -0
- ingest/nsa_reading_room.py +6 -0
- ingest/sap_public_releases.py +9 -0
- ingest/secret_service_reading_room.py +9 -0
- ingest/sources.py +9 -0
- ingest/space_force_foia_reading_room.py +9 -0
- ingest/special_activities_public.py +9 -0
- ingest/special_projects_public.py +9 -0
- ingest/tencap_reading_room.py +9 -0
ingest/__init__.py
ADDED
|
File without changes
|
ingest/aatip_reading_room.py
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from ingest.generic_public_foia import GenericFOIAAdapter
|
| 2 |
+
|
| 3 |
+
class AATIPAdapter(GenericFOIAAdapter):
|
| 4 |
+
"""Public-release FOIA reading room adapter.
|
| 5 |
+
NOTE: This adapter is restricted to publicly released materials only.
|
| 6 |
+
"""
|
| 7 |
+
name = "AATIP"
|
| 8 |
+
rate_limit = 1 # requests per second
|
| 9 |
+
robots_respected = True
|
ingest/agency_registry.py
ADDED
|
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# ingest/agency_registry.py
|
| 2 |
+
|
| 3 |
+
# Domains are PUBLIC FOIA / reading room hosts only.
|
| 4 |
+
# Labels may include units if (and only if) a public FOIA page exists
|
| 5 |
+
# and the user provides its URL.
|
| 6 |
+
|
| 7 |
+
ALLOWED_FOIA_SOURCES = {
|
| 8 |
+
# Core
|
| 9 |
+
"vault.fbi.gov": "FBI",
|
| 10 |
+
"www.cia.gov": "CIA",
|
| 11 |
+
"www.archives.gov": "NARA",
|
| 12 |
+
"foia.state.gov": "State Dept",
|
| 13 |
+
"www.nsa.gov": "NSA",
|
| 14 |
+
"www.defense.gov": "DoD",
|
| 15 |
+
"www.esd.whs.mil": "DoD FOIA",
|
| 16 |
+
"www.whitehouse.gov": "White House",
|
| 17 |
+
|
| 18 |
+
# Military (public FOIA pages)
|
| 19 |
+
"www.af.mil": "USAF",
|
| 20 |
+
"www.navy.mil": "US Navy",
|
| 21 |
+
"www.marines.mil": "USMC",
|
| 22 |
+
"www.army.mil": "US Army",
|
| 23 |
+
"www.spaceforce.mil": "US Space Force",
|
| 24 |
+
|
| 25 |
+
# Intelligence / defense components (public FOIA pages only)
|
| 26 |
+
"www.dia.mil": "DIA",
|
| 27 |
+
"www.nro.gov": "NRO",
|
| 28 |
+
|
| 29 |
+
# Law enforcement / protective services (public FOIA pages)
|
| 30 |
+
"www.secretservice.gov": "US Secret Service",
|
| 31 |
+
"www.dea.gov": "DEA",
|
| 32 |
+
|
| 33 |
+
# Labels for historical / organizational references
|
| 34 |
+
# (NO claim of dedicated public repositories)
|
| 35 |
+
# These are ONLY labels applied if a public FOIA URL is supplied.
|
| 36 |
+
"label:SAC": "CIA Special Activities Center (label only)",
|
| 37 |
+
"label:SAD": "CIA Special Activities Division (label only)",
|
| 38 |
+
"label:NIA": "National Intelligence Authority (historical)"
|
| 39 |
+
}
|
ingest/air_force_foia_reading_room.py
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from ingest.generic_public_foia import GenericFOIAAdapter
|
| 2 |
+
|
| 3 |
+
class USAirForceAdapter(GenericFOIAAdapter):
|
| 4 |
+
"""Public-release FOIA reading room adapter.
|
| 5 |
+
NOTE: This adapter is restricted to publicly released materials only.
|
| 6 |
+
"""
|
| 7 |
+
name = "USAirForce"
|
| 8 |
+
rate_limit = 1 # requests per second
|
| 9 |
+
robots_respected = True
|
ingest/army_foia_reading_room.py
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from ingest.generic_public_foia import GenericFOIAAdapter
|
| 2 |
+
|
| 3 |
+
class USArmyAdapter(GenericFOIAAdapter):
|
| 4 |
+
"""Public-release FOIA reading room adapter.
|
| 5 |
+
NOTE: This adapter is restricted to publicly released materials only.
|
| 6 |
+
"""
|
| 7 |
+
name = "USArmy"
|
| 8 |
+
rate_limit = 1 # requests per second
|
| 9 |
+
robots_respected = True
|
ingest/cia_reading_room.py
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import httpx
|
| 2 |
+
from ingest.generic_public_foia import GenericFOIAAdapter
|
| 3 |
+
|
| 4 |
+
class CIAAdapter(GenericFOIAAdapter):
|
| 5 |
+
name = "CIA"
|
| 6 |
+
rate_limit = 1
|
| 7 |
+
robots_respected = True
|
| 8 |
+
base_url = "https://www.cia.gov/readingroom/search/site/"
|
| 9 |
+
|
| 10 |
+
async def search(self, query: str):
|
| 11 |
+
async with httpx.AsyncClient(timeout=10) as client:
|
| 12 |
+
r = await client.get(self.base_url, params={"query": query})
|
| 13 |
+
if r.status_code != 200:
|
| 14 |
+
return []
|
| 15 |
+
# Minimal safe parse: return page-level hit
|
| 16 |
+
return [{
|
| 17 |
+
"source": "CIA FOIA Reading Room",
|
| 18 |
+
"query": query,
|
| 19 |
+
"url": str(r.url),
|
| 20 |
+
"snippet": "Public FOIA search result page"
|
| 21 |
+
}]
|
ingest/coast_guard_foia_reading_room.py
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from ingest.generic_public_foia import GenericFOIAAdapter
|
| 2 |
+
|
| 3 |
+
class USCoastGuardAdapter(GenericFOIAAdapter):
|
| 4 |
+
"""Public-release FOIA reading room adapter.
|
| 5 |
+
NOTE: This adapter is restricted to publicly released materials only.
|
| 6 |
+
"""
|
| 7 |
+
name = "USCoastGuard"
|
| 8 |
+
rate_limit = 1 # requests per second
|
| 9 |
+
robots_respected = True
|
ingest/darpa_reading_room.py
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from ingest.generic_public_foia import GenericFOIAAdapter
|
| 2 |
+
|
| 3 |
+
class DARPAAdapter(GenericFOIAAdapter):
|
| 4 |
+
"""Public-release FOIA reading room adapter.
|
| 5 |
+
NOTE: This adapter is restricted to publicly released materials only.
|
| 6 |
+
"""
|
| 7 |
+
name = "DARPA"
|
| 8 |
+
rate_limit = 1 # requests per second
|
| 9 |
+
robots_respected = True
|
ingest/dhs_reading_room.py
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from ingest.generic_public_foia import GenericFOIAAdapter
|
| 2 |
+
|
| 3 |
+
class DHSAdapter(GenericFOIAAdapter):
|
| 4 |
+
name = "DHS"
|
| 5 |
+
rate_limit = 1 # requests per second
|
| 6 |
+
robots_respected = True
|
ingest/dia_reading_room.py
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from ingest.generic_public_foia import GenericFOIAAdapter
|
| 2 |
+
|
| 3 |
+
class DIAAdapter(GenericFOIAAdapter):
|
| 4 |
+
name = "DIA"
|
| 5 |
+
rate_limit = 1 # requests per second
|
| 6 |
+
robots_respected = True
|
ingest/dod_reading_room.py
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from ingest.generic_public_foia import GenericFOIAAdapter
|
| 2 |
+
|
| 3 |
+
class DoDAdapter(GenericFOIAAdapter):
|
| 4 |
+
name = "DoD"
|
| 5 |
+
rate_limit = 1 # requests per second
|
| 6 |
+
robots_respected = True
|
ingest/dod_reading_room_live.py
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import httpx
|
| 2 |
+
from ingest.generic_public_foia import GenericFOIAAdapter
|
| 3 |
+
|
| 4 |
+
class DoDAdapter(GenericFOIAAdapter):
|
| 5 |
+
name = "DoD FOIA Reading Room"
|
| 6 |
+
rate_limit = 1
|
| 7 |
+
robots_respected = True
|
| 8 |
+
base_url = "https://www.esd.whs.mil/FOIA/Reading-Room/"
|
| 9 |
+
|
| 10 |
+
async def search(self, query: str):
|
| 11 |
+
async with httpx.AsyncClient(timeout=10) as client:
|
| 12 |
+
r = await client.get(self.base_url, params={"search": query})
|
| 13 |
+
if r.status_code != 200:
|
| 14 |
+
return []
|
| 15 |
+
return [{
|
| 16 |
+
"source": "DoD FOIA Reading Room",
|
| 17 |
+
"query": query,
|
| 18 |
+
"url": str(r.url),
|
| 19 |
+
"snippet": "Public DoD FOIA reading room page"
|
| 20 |
+
}]
|
ingest/fbi_vault.py
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import requests
|
| 2 |
+
from bs4 import BeautifulSoup
|
| 3 |
+
|
| 4 |
+
def ingest_fbi_vault(url: str) -> dict:
|
| 5 |
+
r = requests.get(url, timeout=10)
|
| 6 |
+
r.raise_for_status()
|
| 7 |
+
soup = BeautifulSoup(r.text, "html.parser")
|
| 8 |
+
|
| 9 |
+
text = soup.get_text(separator=" ", strip=True)
|
| 10 |
+
title = soup.find("h1")
|
| 11 |
+
|
| 12 |
+
return {
|
| 13 |
+
"source": "FBI Vault",
|
| 14 |
+
"agency": "FBI",
|
| 15 |
+
"url": url,
|
| 16 |
+
"title": title.text if title else "FBI Vault Document",
|
| 17 |
+
"text": text[:10000]
|
| 18 |
+
}(r.text, "html.parser")
|
| 19 |
+
|
| 20 |
+
title = soup.find("h1")
|
| 21 |
+
body = soup.get_text(separator=" ", strip=True)
|
| 22 |
+
|
| 23 |
+
return {
|
| 24 |
+
"source": "FBI Vault",
|
| 25 |
+
"url": url,
|
| 26 |
+
"title": title.text if title else "Untitled FBI Vault Document",
|
| 27 |
+
"text": body,
|
| 28 |
+
"agency": "FBI"
|
| 29 |
+
}
|
ingest/fbi_vault_live.py
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import httpx
|
| 2 |
+
from ingest.generic_public_foia import GenericFOIAAdapter
|
| 3 |
+
|
| 4 |
+
class FBIAdapter(GenericFOIAAdapter):
|
| 5 |
+
name = "FBI Vault"
|
| 6 |
+
rate_limit = 1
|
| 7 |
+
robots_respected = True
|
| 8 |
+
base_url = "https://vault.fbi.gov/search"
|
| 9 |
+
|
| 10 |
+
async def search(self, query: str):
|
| 11 |
+
async with httpx.AsyncClient(timeout=10) as client:
|
| 12 |
+
r = await client.get(self.base_url, params={"q": query})
|
| 13 |
+
if r.status_code != 200:
|
| 14 |
+
return []
|
| 15 |
+
return [{
|
| 16 |
+
"source": "FBI Vault",
|
| 17 |
+
"query": query,
|
| 18 |
+
"url": str(r.url),
|
| 19 |
+
"snippet": "Public FBI Vault search results"
|
| 20 |
+
}]
|
ingest/generic_public_foia.py
ADDED
|
@@ -0,0 +1,46 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# ingest/generic_public_foia.py
|
| 2 |
+
|
| 3 |
+
import requests
|
| 4 |
+
from bs4 import BeautifulSoup
|
| 5 |
+
from urllib.parse import urlparse
|
| 6 |
+
from typing import Dict
|
| 7 |
+
from .agency_registry import ALLOWED_FOIA_SOURCES
|
| 8 |
+
|
| 9 |
+
MAX_CHARS = 12000
|
| 10 |
+
|
| 11 |
+
def infer_agency_from_url(url: str) -> str:
|
| 12 |
+
host = urlparse(url).netloc.lower()
|
| 13 |
+
|
| 14 |
+
for domain, agency in ALLOWED_FOIA_SOURCES.items():
|
| 15 |
+
if domain.startswith("label:"):
|
| 16 |
+
continue
|
| 17 |
+
if domain in host:
|
| 18 |
+
return agency
|
| 19 |
+
|
| 20 |
+
return "Unknown"
|
| 21 |
+
|
| 22 |
+
def ingest_public_foia_url(url: str) -> Dict:
|
| 23 |
+
"""
|
| 24 |
+
SAFE ingestion:
|
| 25 |
+
- user-supplied URL only
|
| 26 |
+
- public FOIA / reading room pages
|
| 27 |
+
- no crawling, no discovery
|
| 28 |
+
"""
|
| 29 |
+
|
| 30 |
+
agency = infer_agency_from_url(url)
|
| 31 |
+
|
| 32 |
+
r = requests.get(url, timeout=15)
|
| 33 |
+
r.raise_for_status()
|
| 34 |
+
|
| 35 |
+
soup = BeautifulSoup(r.text, "html.parser")
|
| 36 |
+
|
| 37 |
+
title = soup.find("h1")
|
| 38 |
+
text = soup.get_text(separator=" ", strip=True)
|
| 39 |
+
|
| 40 |
+
return {
|
| 41 |
+
"agency": agency,
|
| 42 |
+
"url": url,
|
| 43 |
+
"title": title.text.strip() if title else "Public FOIA Document",
|
| 44 |
+
"text": text[:MAX_CHARS],
|
| 45 |
+
"source_type": "public_foia"
|
| 46 |
+
}
|
ingest/ice_reading_room.py
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from ingest.generic_public_foia import GenericFOIAAdapter
|
| 2 |
+
|
| 3 |
+
class ICEAdapter(GenericFOIAAdapter):
|
| 4 |
+
name = "ICE"
|
| 5 |
+
rate_limit = 1 # requests per second
|
| 6 |
+
robots_respected = True
|
ingest/loader.py
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import requests
|
| 2 |
+
from bs4 import BeautifulSoup
|
| 3 |
+
from typing import List, Dict
|
| 4 |
+
|
| 5 |
+
def ingest_documents(enable_scraping: bool = False) -> List[Dict]:
|
| 6 |
+
if not enable_scraping:
|
| 7 |
+
return []
|
| 8 |
+
|
| 9 |
+
# HF-safe: capped, read-only metadata fetch
|
| 10 |
+
docs = []
|
| 11 |
+
try:
|
| 12 |
+
r = requests.get("https://vault.fbi.gov", timeout=10)
|
| 13 |
+
soup = BeautifulSoup(r.text, "html.parser")
|
| 14 |
+
for link in soup.select("a")[:10]:
|
| 15 |
+
docs.append({
|
| 16 |
+
"title": link.text.strip(),
|
| 17 |
+
"agency": "FBI",
|
| 18 |
+
"date": "",
|
| 19 |
+
"content": link.get("href", "")
|
| 20 |
+
})
|
| 21 |
+
except Exception:
|
| 22 |
+
pass
|
| 23 |
+
|
| 24 |
+
return docs
|
ingest/marine_corps_foia_reading_room.py
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from ingest.generic_public_foia import GenericFOIAAdapter
|
| 2 |
+
|
| 3 |
+
class USMarineCorpsAdapter(GenericFOIAAdapter):
|
| 4 |
+
"""Public-release FOIA reading room adapter.
|
| 5 |
+
NOTE: This adapter is restricted to publicly released materials only.
|
| 6 |
+
"""
|
| 7 |
+
name = "USMarineCorps"
|
| 8 |
+
rate_limit = 1 # requests per second
|
| 9 |
+
robots_respected = True
|
ingest/navy_foia_reading_room.py
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from ingest.generic_public_foia import GenericFOIAAdapter
|
| 2 |
+
|
| 3 |
+
class USNavyAdapter(GenericFOIAAdapter):
|
| 4 |
+
"""Public-release FOIA reading room adapter.
|
| 5 |
+
NOTE: This adapter is restricted to publicly released materials only.
|
| 6 |
+
"""
|
| 7 |
+
name = "USNavy"
|
| 8 |
+
rate_limit = 1 # requests per second
|
| 9 |
+
robots_respected = True
|
ingest/nia_reading_room.py
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from ingest.generic_public_foia import GenericFOIAAdapter
|
| 2 |
+
|
| 3 |
+
class NIAAdapter(GenericFOIAAdapter):
|
| 4 |
+
"""Public-release FOIA reading room adapter.
|
| 5 |
+
NOTE: This adapter is restricted to publicly released materials only.
|
| 6 |
+
"""
|
| 7 |
+
name = "NIA"
|
| 8 |
+
rate_limit = 1 # requests per second
|
| 9 |
+
robots_respected = True
|
ingest/nis_reading_room.py
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from ingest.generic_public_foia import GenericFOIAAdapter
|
| 2 |
+
|
| 3 |
+
class NISAdapter(GenericFOIAAdapter):
|
| 4 |
+
"""Public-release FOIA reading room adapter.
|
| 5 |
+
NOTE: This adapter is restricted to publicly released materials only.
|
| 6 |
+
"""
|
| 7 |
+
name = "NIS"
|
| 8 |
+
rate_limit = 1 # requests per second
|
| 9 |
+
robots_respected = True
|
ingest/nro_reading_room.py
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from ingest.generic_public_foia import GenericFOIAAdapter
|
| 2 |
+
|
| 3 |
+
class NROAdapter(GenericFOIAAdapter):
|
| 4 |
+
"""Public-release FOIA reading room adapter.
|
| 5 |
+
NOTE: This adapter is restricted to publicly released materials only.
|
| 6 |
+
"""
|
| 7 |
+
name = "NRO"
|
| 8 |
+
rate_limit = 1 # requests per second
|
| 9 |
+
robots_respected = True
|
ingest/nsa_reading_room.py
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from ingest.generic_public_foia import GenericFOIAAdapter
|
| 2 |
+
|
| 3 |
+
class NSAAdapter(GenericFOIAAdapter):
|
| 4 |
+
name = "NSA"
|
| 5 |
+
rate_limit = 1 # requests per second
|
| 6 |
+
robots_respected = True
|
ingest/sap_public_releases.py
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from ingest.generic_public_foia import GenericFOIAAdapter
|
| 2 |
+
|
| 3 |
+
class SpecialAccessProgramsAdapter(GenericFOIAAdapter):
|
| 4 |
+
"""Public-release FOIA reading room adapter.
|
| 5 |
+
NOTE: This adapter is restricted to publicly released materials only.
|
| 6 |
+
"""
|
| 7 |
+
name = "SpecialAccessPrograms"
|
| 8 |
+
rate_limit = 1 # requests per second
|
| 9 |
+
robots_respected = True
|
ingest/secret_service_reading_room.py
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from ingest.generic_public_foia import GenericFOIAAdapter
|
| 2 |
+
|
| 3 |
+
class SecretServiceAdapter(GenericFOIAAdapter):
|
| 4 |
+
"""Public-release FOIA reading room adapter.
|
| 5 |
+
NOTE: This adapter is restricted to publicly released materials only.
|
| 6 |
+
"""
|
| 7 |
+
name = "SecretService"
|
| 8 |
+
rate_limit = 1 # requests per second
|
| 9 |
+
robots_respected = True
|
ingest/sources.py
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
AGENCY_SOURCES = {
|
| 2 |
+
"FBI": "https://vault.fbi.gov",
|
| 3 |
+
"CIA": "https://www.cia.gov/readingroom",
|
| 4 |
+
"DoD": "https://www.esd.whs.mil/FOIA/",
|
| 5 |
+
"NSA": "https://www.nsa.gov/Helpful-Links/FOIA/",
|
| 6 |
+
"NRO": "https://www.nro.gov/FOIA/",
|
| 7 |
+
"USAF": "https://www.afhra.af.mil/FOIA/",
|
| 8 |
+
"White House": "https://www.archives.gov/foia"
|
| 9 |
+
}
|
ingest/space_force_foia_reading_room.py
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from ingest.generic_public_foia import GenericFOIAAdapter
|
| 2 |
+
|
| 3 |
+
class USSpaceForceAdapter(GenericFOIAAdapter):
|
| 4 |
+
"""Public-release FOIA reading room adapter.
|
| 5 |
+
NOTE: This adapter is restricted to publicly released materials only.
|
| 6 |
+
"""
|
| 7 |
+
name = "USSpaceForce"
|
| 8 |
+
rate_limit = 1 # requests per second
|
| 9 |
+
robots_respected = True
|
ingest/special_activities_public.py
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from ingest.generic_public_foia import GenericFOIAAdapter
|
| 2 |
+
|
| 3 |
+
class SpecialActivitiesAdapter(GenericFOIAAdapter):
|
| 4 |
+
"""Public-release FOIA reading room adapter.
|
| 5 |
+
NOTE: This adapter is restricted to publicly released materials only.
|
| 6 |
+
"""
|
| 7 |
+
name = "SpecialActivities"
|
| 8 |
+
rate_limit = 1 # requests per second
|
| 9 |
+
robots_respected = True
|
ingest/special_projects_public.py
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from ingest.generic_public_foia import GenericFOIAAdapter
|
| 2 |
+
|
| 3 |
+
class SpecialProjectsAdapter(GenericFOIAAdapter):
|
| 4 |
+
"""Public-release FOIA reading room adapter.
|
| 5 |
+
NOTE: This adapter is restricted to publicly released materials only.
|
| 6 |
+
"""
|
| 7 |
+
name = "SpecialProjects"
|
| 8 |
+
rate_limit = 1 # requests per second
|
| 9 |
+
robots_respected = True
|
ingest/tencap_reading_room.py
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from ingest.generic_public_foia import GenericFOIAAdapter
|
| 2 |
+
|
| 3 |
+
class TENCAPAdapter(GenericFOIAAdapter):
|
| 4 |
+
"""Public-release FOIA reading room adapter.
|
| 5 |
+
NOTE: This adapter is restricted to publicly released materials only.
|
| 6 |
+
"""
|
| 7 |
+
name = "TENCAP"
|
| 8 |
+
rate_limit = 1 # requests per second
|
| 9 |
+
robots_respected = True
|