Spaces:
Sleeping
Sleeping
| import requests | |
| from bs4 import BeautifulSoup | |
| from typing import List, Dict | |
| def ingest_documents(enable_scraping: bool = False) -> List[Dict]: | |
| if not enable_scraping: | |
| return [] | |
| # HF-safe: capped, read-only metadata fetch | |
| docs = [] | |
| try: | |
| r = requests.get("https://vault.fbi.gov", timeout=10) | |
| soup = BeautifulSoup(r.text, "html.parser") | |
| for link in soup.select("a")[:10]: | |
| docs.append({ | |
| "title": link.text.strip(), | |
| "agency": "FBI", | |
| "date": "", | |
| "content": link.get("href", "") | |
| }) | |
| except Exception: | |
| pass | |
| return docs |