File size: 664 Bytes
6aba5f3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
import requests
from bs4 import BeautifulSoup
from typing import List, Dict

def ingest_documents(enable_scraping: bool = False) -> List[Dict]:
    if not enable_scraping:
        return []

    # HF-safe: capped, read-only metadata fetch
    docs = []
    try:
        r = requests.get("https://vault.fbi.gov", timeout=10)
        soup = BeautifulSoup(r.text, "html.parser")
        for link in soup.select("a")[:10]:
            docs.append({
                "title": link.text.strip(),
                "agency": "FBI",
                "date": "",
                "content": link.get("href", "")
            })
    except Exception:
        pass

    return docs