Spaces:

GodsDevProject
/

FOIA_Doc_Search

Sleeping

File size: 664 Bytes

6aba5f3

import requests
from bs4 import BeautifulSoup
from typing import List, Dict

def ingest_documents(enable_scraping: bool = False) -> List[Dict]:
    if not enable_scraping:
        return []

    # HF-safe: capped, read-only metadata fetch
    docs = []
    try:
        r = requests.get("https://vault.fbi.gov", timeout=10)
        soup = BeautifulSoup(r.text, "html.parser")
        for link in soup.select("a")[:10]:
            docs.append({
                "title": link.text.strip(),
                "agency": "FBI",
                "date": "",
                "content": link.get("href", "")
            })
    except Exception:
        pass

    return docs