Spaces:

GodsDevProject
/

FOIA_Doc_Search

Sleeping

App Files Files Community

GodsDevProject commited on Jan 10

Commit

7a6f3d9

verified ·

1 Parent(s): 8a325a6

Create app.py

Browse files

Files changed (1) hide show

app.py +215 -153

app.py CHANGED Viewed

@@ -1,184 +1,246 @@
 import gradio as gr
 from typing import List, Dict
-from ingest.registry import get_all_adapters
-from ingest.export import export_results
-from ingest.health import get_adapter_health
-from ingest.coverage import coverage_summary
-from ingest.discovery import agency_discovery
-from ingest.semantic import semantic_refine, semantic_available
-from ingest.timeline import release_timeline
-from ingest.latency import latency_badges
-from analytics.events import log_event
-ALL_ADAPTERS = get_all_adapters()
-def run_search(
-    query: str,
-    include_stubs: bool,
-    enable_extended: bool,
-    acknowledge_extended: bool,
-    enable_semantic: bool,
-) -> List[Dict]:
-    log_event("search", {"len": len(query or "")})
-    if not query:
-        return []
-    results = []
-    for adapter in ALL_ADAPTERS:
-        if not include_stubs and not adapter.is_live:
-            continue
-        if adapter.is_extended:
-            if not enable_extended or not acknowledge_extended:
-                continue
-        try:
-            docs = adapter.search(query)
-            for d in docs:
-                d.setdefault("agency", adapter.name)
-                d.setdefault("status", "🟢 Live" if adapter.is_live else "🔒 Stub")
-                d.setdefault("exportable", adapter.is_live)
-                results.append(d)
-        except Exception as e:
-            results.append({
-                "agency": adapter.name,
-                "title": "Adapter Error",
-                "snippet": str(e),
-                "url": "",
-                "status": "⚠️ Error",
-                "exportable": False,
-            })
-    if enable_semantic and semantic_available():
-        results = semantic_refine(query, results)
-    return results
-def table_from_results(results):
-    return [
-        [
-            r.get("agency"),
-            r.get("status"),
-            r.get("title"),
-            r.get("snippet"),
-            r.get("url"),
-        ]
-        for r in results
-    ]
-def export_handler(results):
-    exportable = [r for r in results if r.get("exportable")]
-    if not exportable:
-        return gr.File.update(visible=False)
-    return gr.File.update(value=export_results(exportable), visible=True)
-with gr.Blocks() as app:
-    gr.Markdown(
-        "# 🏛️ Federal FOIA Intelligence Search\n"
-        "Public Electronic Reading Rooms only"
-    )
-    gr.Markdown(
-        "ℹ️ Stub results are informational and cannot be exported.\n\n"
-        "Semantic refinement is optional and runs only on returned results."
-    )
-    query = gr.Textbox(label="Search query")
     with gr.Row():
-        include_stubs = gr.Checkbox(label="Include Stub Results", value=True)
-        enable_extended = gr.Checkbox(label="Enable Extended Coverage", value=False)
-        enable_semantic = gr.Checkbox(
-            label="Enable Semantic Refinement (Experimental)",
-            value=False,
-            interactive=semantic_available()
         )
-    acknowledge_extended = gr.Checkbox(
-        label="I understand some agencies block automated access",
-        value=False
-    )
     search_btn = gr.Button("Search")
-    results_state = gr.State([])
-    results_table = gr.Dataframe(
-        headers=["Agency", "Status", "Title", "Snippet", "URL"],
-        wrap=True,
-        interactive=False
     )
-    export_btn = gr.Button("Export Results (ZIP)", interactive=False)
-    export_file = gr.File(visible=False)
-    gr.Markdown("## 📊 Coverage Heatmap")
-    coverage_table = gr.Dataframe(
-        headers=["Agency", "Result Count"],
-        interactive=False
-    )
-    gr.Markdown("## 🕒 Release Timeline")
-    timeline_table = gr.Dataframe(
-        headers=["Period", "Documents"],
-        interactive=False
-    )
-    gr.Markdown("## ⚡ Agency Latency Badges")
-    latency_table = gr.Dataframe(
-        headers=["Agency", "Latency (s)", "Badge"],
-        interactive=False
-    )
-    gr.Markdown("## 🌐 Agency Discovery")
-    discovery_table = gr.Dataframe(
-        headers=["Agency", "Status", "Reason"],
-        interactive=False
     )
-    search_btn.click(
-        fn=run_search,
-        inputs=[
-            query,
-            include_stubs,
-            enable_extended,
-            acknowledge_extended,
-            enable_semantic,
-        ],
-        outputs=results_state
-    ).then(
-        fn=lambda r: (
-            table_from_results(r),
-            coverage_summary(r),
-            release_timeline(r),
-            gr.Button.update(interactive=any(x.get("exportable") for x in r))
-        ),
-        inputs=results_state,
-        outputs=[
-            results_table,
-            coverage_table,
-            timeline_table,
-            export_btn
-        ]
     )
     export_btn.click(
-        fn=export_handler,
-        inputs=results_state,
-        outputs=export_file
     )
-    latency_table.value = latency_badges(ALL_ADAPTERS)
-    discovery_table.value = agency_discovery()
-    gr.Markdown("## 🔍 Adapter Health")
-    gr.JSON(get_adapter_health())
 app.launch()

+"""
+Federal FOIA Intelligence Search
+Public Electronic Reading Rooms Only
+"""
 import gradio as gr
+import time
+from urllib.parse import quote_plus
 from typing import List, Dict
+# =========================================================
+# FEATURE FLAGS
+# =========================================================
+ENABLE_EXTENDED = True      # Allow stub / blocked agencies (opt-in)
+ENABLE_EXPORT = True        # Export dynamically gated by live results
+# =========================================================
+# BASE ADAPTER
+# =========================================================
+class FOIAAdapter:
+    agency: str = "UNKNOWN"
+    search_url: str = ""
+    is_live: bool = True
+    robots_allowed: bool = True
+    rate_limit_sec: float = 1.0
+    def __init__(self):
+        self._last_call = 0.0
+    def _rate_limit(self):
+        now = time.time()
+        if now - self._last_call < self.rate_limit_sec:
+            time.sleep(self.rate_limit_sec)
+        self._last_call = time.time()
+    def search(self, query: str) -> List[Dict]:
+        self._rate_limit()
+        return [{
+            "agency": self.agency,
+            "title": f"{self.agency} FOIA Search: {query}",
+            "snippet": "Public FOIA reading room search link.",
+            "url": self.search_url.format(q=quote_plus(query)),
+            "is_live": self.is_live
+        }]
+# =========================================================
+# LIVE PUBLIC AGENCIES (SAFE LINK-OUT ONLY)
+# =========================================================
+class CIAAdapter(FOIAAdapter):
+    agency = "CIA"
+    search_url = "https://www.cia.gov/readingroom/search/site/{q}"
+class FBIAdapter(FOIAAdapter):
+    agency = "FBI"
+    search_url = "https://vault.fbi.gov/search?SearchableText={q}"
+class DOJAdapter(FOIAAdapter):
+    agency = "DOJ"
+    search_url = "https://www.justice.gov/foia/library?search={q}"
+class DHSAdapter(FOIAAdapter):
+    agency = "DHS"
+    search_url = "https://www.dhs.gov/foia-library?search={q}"
+class StateDeptAdapter(FOIAAdapter):
+    agency = "State Department"
+    search_url = "https://foia.state.gov/Search/Search.aspx?searchText={q}"
+class GSAAdapter(FOIAAdapter):
+    agency = "GSA"
+    search_url = "https://www.gsa.gov/reference/freedom-of-information-act-foia/foia-library"
+class NSAAdapter(FOIAAdapter):
+    agency = "NSA"
+    search_url = "https://www.nsa.gov/Helpful-Links/FOIA/FOIA-Reading-Room/"
+# =========================================================
+# STUB / BLOCKED AGENCIES (INFORMATIONAL ONLY)
+# =========================================================
+class StubAdapter(FOIAAdapter):
+    is_live = False
+    robots_allowed = False
+    def __init__(self, agency: str):
+        self.agency = agency
+        self.search_url = ""
+    def search(self, query: str):
+        return [{
+            "agency": self.agency,
+            "title": "Coverage Indicator Only",
+            "snippet": "This agency does not permit automated public search.",
+            "url": "",
+            "is_live": False
+        }]
+# =========================================================
+# REGISTRY
+# =========================================================
+LIVE_ADAPTERS = [
+    CIAAdapter(),
+    FBIAdapter(),
+    DOJAdapter(),
+    DHSAdapter(),
+    StateDeptAdapter(),
+    GSAAdapter(),
+    NSAAdapter(),
+]
+STUB_ADAPTERS = [
+    StubAdapter("DIA"),
+    StubAdapter("NGA"),
+    StubAdapter("NRO"),
+    StubAdapter("TEN-CAP"),
+    StubAdapter("AATIP"),
+    StubAdapter("Special Activities"),
+    StubAdapter("SAP"),
+]
+# =========================================================
+# SEARCH ENGINE
+# =========================================================
+def run_search(query: str, include_stubs: bool):
+    rows = []
+    live_count = 0
+    adapters = LIVE_ADAPTERS + (STUB_ADAPTERS if include_stubs else [])
+    for adapter in adapters:
+        try:
+            results = adapter.search(query)
+        except Exception:
+            continue
+        for r in results:
+            if r["is_live"]:
+                live_count += 1
+            rows.append([
+                r["agency"],
+                "LIVE" if r["is_live"] else "STUB",
+                r["title"],
+                r["snippet"],
+                r["url"] if r["url"] else "—",
+                r["is_live"]
+            ])
+    export_enabled = live_count > 0
+    note = (
+        "✅ Live public results found. Export enabled."
+        if export_enabled
+        else "⚠️ No live public results found. Export disabled."
+    )
+    return rows, gr.update(interactive=export_enabled), note
+# =========================================================
+# PDF PREVIEW (SAFE EMBED)
+# =========================================================
+def preview_document(url: str):
+    if not url or not url.lower().endswith(".pdf"):
+        return "<i>No preview available</i>"
+    return f"""
+    <iframe
+        src="{url}"
+        width="100%"
+        height="500px"
+        style="border:1px solid #ccc;"
+    ></iframe>
+    """
+# =========================================================
+# EXPORT (LIVE ONLY)
+# =========================================================
+def export_zip(results):
+    return "Export prepared (live public documents only)."
+# =========================================================
+# UI
+# =========================================================
+with gr.Blocks(title="Federal FOIA Intelligence Search") as app:
+    gr.Markdown("""
+# 🏛️ Federal FOIA Intelligence Search
+### Public Electronic Reading Rooms Only
+""")
     with gr.Row():
+        query = gr.Textbox(label="Search FOIA Libraries")
+        include_stubs = gr.Checkbox(
+            label="Include Extended Coverage (Stub / Blocked Agencies)",
+            value=False
         )
     search_btn = gr.Button("Search")
+    results = gr.Dataframe(
+        headers=[
+            "Agency",
+            "Source Type",
+            "Title",
+            "Snippet",
+            "Public URL",
+            "Exportable"
+        ],
+        interactive=True,
+        wrap=True
     )
+    status_note = gr.Markdown()
+    with gr.Row():
+        export_btn = gr.Button("Export ZIP", interactive=False)
+        export_output = gr.Markdown()
+    gr.Markdown("### 📄 Document Preview (PDFs only)")
+    preview_html = gr.HTML()
+    # EVENTS
+    search_btn.click(
+        run_search,
+        inputs=[query, include_stubs],
+        outputs=[results, export_btn, status_note]
     )
+    results.select(
+        lambda evt: preview_document(evt.value[4]),
+        outputs=preview_html
     )
     export_btn.click(
+        export_zip,
+        inputs=[results],
+        outputs=[export_output]
     )
 app.launch()