import requests from bs4 import BeautifulSoup from typing import List, Dict def ingest_documents(enable_scraping: bool = False) -> List[Dict]: if not enable_scraping: return [] # HF-safe: capped, read-only metadata fetch docs = [] try: r = requests.get("https://vault.fbi.gov", timeout=10) soup = BeautifulSoup(r.text, "html.parser") for link in soup.select("a")[:10]: docs.append({ "title": link.text.strip(), "agency": "FBI", "date": "", "content": link.get("href", "") }) except Exception: pass return docs