Spaces:
Sleeping
Sleeping
| import requests | |
| from bs4 import BeautifulSoup | |
| def ingest_fbi_vault(url: str) -> dict: | |
| r = requests.get(url, timeout=10) | |
| r.raise_for_status() | |
| soup = BeautifulSoup(r.text, "html.parser") | |
| text = soup.get_text(separator=" ", strip=True) | |
| title = soup.find("h1") | |
| return { | |
| "source": "FBI Vault", | |
| "agency": "FBI", | |
| "url": url, | |
| "title": title.text if title else "FBI Vault Document", | |
| "text": text[:10000] | |
| }(r.text, "html.parser") | |
| title = soup.find("h1") | |
| body = soup.get_text(separator=" ", strip=True) | |
| return { | |
| "source": "FBI Vault", | |
| "url": url, | |
| "title": title.text if title else "Untitled FBI Vault Document", | |
| "text": body, | |
| "agency": "FBI" | |
| } |