Spaces:

GodsDevProject
/

FOIA_Doc_Search

Sleeping

FOIA_Doc_Search / ingest /loader.py

Upload 31 files

6aba5f3 verified about 1 month ago

664 Bytes

	import requests
	from bs4 import BeautifulSoup
	from typing import List, Dict

	def ingest_documents(enable_scraping: bool = False) -> List[Dict]:
	if not enable_scraping:
	return []

	# HF-safe: capped, read-only metadata fetch
	docs = []
	try:
	r = requests.get("https://vault.fbi.gov", timeout=10)
	soup = BeautifulSoup(r.text, "html.parser")
	for link in soup.select("a")[:10]:
	docs.append({
	"title": link.text.strip(),
	"agency": "FBI",
	"date": "",
	"content": link.get("href", "")
	})
	except Exception:
	pass

	return docs