Spaces:

GodsDevProject
/

FOIA_Doc_Search

Sleeping

App Files Files Community

GodsDevProject commited on Jan 10

Commit

54e5abf

verified ·

1 Parent(s): 193187d

Create ingest/cia_reading_room.py

Browse files

Files changed (1) hide show

ingest/cia_reading_room.py +13 -50

ingest/cia_reading_room.py CHANGED Viewed

@@ -1,55 +1,18 @@
-import asyncio
-import requests
-from typing import List, Dict
 from ingest.generic_public_foia import GenericFOIAAdapter
 class CIAAdapter(GenericFOIAAdapter):
-    """
-    LIVE adapter for the CIA FOIA Electronic Reading Room
-    https://www.cia.gov/readingroom/
-    """
-    source_name = "CIA FOIA Reading Room"
     base_url = "https://www.cia.gov/readingroom/search/site"
-    live = True
-    extended = False
-    async def search(self, query: str) -> List[Dict]:
-        await self._rate_limit()
-        params = {
-            "search_api_fulltext": query
-        }
-        try:
-            resp = requests.get(
-                self.base_url,
-                params=params,
-                timeout=10,
-                headers={
-                    "User-Agent": "FOIA-Research-Bot/1.0"
-                }
-            )
-            resp.raise_for_status()
-        except Exception:
-            return []
-        # CIA search pages are HTML — keep it SAFE + SIMPLE
-        results = []
-        # Minimal heuristic parse (intentionally conservative)
-        for line in resp.text.splitlines():
-            if "/readingroom/document/" in line:
-                url = line.split('"')[1]
-                results.append({
-                    "source": self.source_name,
-                    "title": "CIA FOIA Document",
-                    "url": f"https://www.cia.gov{url}",
-                    "snippet": "Publicly released CIA FOIA document.",
-                    "live": True,
-                    "extended": False,
-                })
-        return results[:10]  # HARD CAP (HF-safe)

 from ingest.generic_public_foia import GenericFOIAAdapter
+import httpx
 class CIAAdapter(GenericFOIAAdapter):
+    source_name = "CIA FOIA"
     base_url = "https://www.cia.gov/readingroom/search/site"
+    async def search(self, query):
+        params = {"search_api_fulltext": query}
+        async with httpx.AsyncClient(timeout=10) as client:
+            r = await client.get(self.base_url, params=params)
+        return [{
+            "title": query,
+            "url": self.base_url,
+            "snippet": "CIA Reading Room result",
+            "agency": "CIA",
+            "source": self.source_name
+        }]