Spaces:

GodsDevProject
/

FOIA_Doc_Search

Sleeping

App Files Files Community

GodsDevProject commited on Jan 10

Commit

2bf60a1

verified ·

1 Parent(s): ce2d379

Update app.py

Browse files

Files changed (1) hide show

app.py +127 -210

app.py CHANGED Viewed

@@ -6,29 +6,17 @@ import io
 from datetime import datetime
 from urllib.parse import quote_plus, urlparse
 from collections import defaultdict, Counter
 import plotly.graph_objects as go
 from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer
 from reportlab.lib.styles import getSampleStyleSheet
-# ======================================================
-# OPTIONAL SEMANTIC DEPENDENCIES (SAFE-GUARDED)
-# ======================================================
-FAISS_AVAILABLE = False
-try:
-    import faiss
-    from sentence_transformers import SentenceTransformer
-    FAISS_AVAILABLE = True
-except Exception:
-    FAISS_AVAILABLE = False
 # ======================================================
 # CONFIG / FEATURE GATES
 # ======================================================
-ENABLE_SEMANTIC = False          # user opt-in only
-ENABLE_PDF_EXPORT = True        # LIVE results only
 ENABLE_PDF_THUMBNAILS = True
 ENABLE_ENTITY_GRAPHS = True
 ENABLE_TIMELINES = True
@@ -44,21 +32,17 @@ class FOIAAdapter:
     is_live = True
     def search(self, query):
-        start = time.time()
         url = self.search_url.format(q=quote_plus(query))
-        latency = round(time.time() - start, 3)
         return [{
             "agency": self.agency,
             "title": f"{self.agency} FOIA Search Results",
             "url": url,
-            "latency": latency,
             "is_live": self.is_live,
             "timestamp": datetime.utcnow().isoformat()
         }]
 # ======================================================
-# LIVE AGENCIES (LINK-OUT ONLY)
 # ======================================================
 class CIA(FOIAAdapter):
@@ -89,46 +73,7 @@ class NSA(FOIAAdapter):
     agency = "NSA"
     search_url = "https://www.nsa.gov/resources/everyone/foia/reading-room/?q={q}"
-LIVE_ADAPTERS = [
-    CIA(),
-    FBI(),
-    DOJ(),
-    DHS(),
-    STATE(),
-    GSA(),
-    NSA()
-]
-# ======================================================
-# STUB ADAPTERS (CLEARLY LABELED)
-# ======================================================
-class StubAdapter(FOIAAdapter):
-    is_live = False
-    def __init__(self, agency):
-        self.agency = agency
-        self.search_url = ""
-    def search(self, query):
-        return [{
-            "agency": self.agency,
-            "title": "Extended coverage indicator only (STUB)",
-            "url": "",
-            "latency": None,
-            "is_live": False,
-            "timestamp": None
-        }]
-STUB_ADAPTERS = [
-    StubAdapter("DIA"),
-    StubAdapter("NGA"),
-    StubAdapter("NRO"),
-    StubAdapter("TEN-CAP"),
-    StubAdapter("AATIP"),
-    StubAdapter("SAP"),
-    StubAdapter("Special Activities"),
-]
 # ======================================================
 # UTILITIES
@@ -138,209 +83,181 @@ def citation_hash(r):
     raw = f"{r['agency']}{r['url']}{r['timestamp']}"
     return hashlib.sha256(raw.encode()).hexdigest()[:16]
-def bluebook_full(r):
     return (
         f"{r['agency']}, {r['title']}, FOIA Electronic Reading Room, "
         f"{r['url']} (retrieved {datetime.utcnow().strftime('%b %d, %Y')})."
     )
-def bluebook_short(r):
-    return f"{r['agency']}, FOIA Reading Room, {r['url']}."
 # ======================================================
 # GLOBAL STATE
 # ======================================================
-LAST_LIVE_RECORDS = []
 # ======================================================
-# SEARCH HANDLER
 # ======================================================
-def run_search(query, include_stubs, semantic_mode):
-    global LAST_LIVE_RECORDS
-    LAST_LIVE_RECORDS = []
-    adapters = LIVE_ADAPTERS + (STUB_ADAPTERS if include_stubs else [])
     rows = []
-    coverage = defaultdict(int)
-    for adapter in adapters:
         for r in adapter.search(query):
-            coverage[r["agency"]] += 1
-            if r["is_live"]:
-                LAST_LIVE_RECORDS.append(r)
             rows.append([
                 r["agency"],
-                "LIVE" if r["is_live"] else "STUB",
                 r["title"],
                 r["url"],
-                r["latency"],
-                citation_hash(r) if r["is_live"] else "",
-                bluebook_full(r) if r["is_live"] else "Not exportable (STUB)"
             ])
-    gap_md = "### Coverage Gaps\n"
-    for agency in [a.agency for a in LIVE_ADAPTERS]:
-        if coverage.get(agency, 0) == 0:
-            gap_md += f"- ❌ **{agency}**: no public results found\n"
-    return rows, gap_md
 # ======================================================
-# SEMANTIC STATUS
 # ======================================================
-def semantic_status(enabled):
-    if enabled and not FAISS_AVAILABLE:
-        return "⚠ Semantic mode unavailable (optional dependencies missing)"
-    if enabled:
-        return "🧠 Semantic mode enabled (metadata only)"
-    return "Semantic mode off"
 # ======================================================
-# FOIA REQUEST GENERATOR (PDF)
 # ======================================================
-def generate_foia_request(requester, description):
-    buffer = io.BytesIO()
-    doc = SimpleDocTemplate(buffer)
-    styles = getSampleStyleSheet()
-    story = []
-    story.append(Paragraph("<b>Freedom of Information Act Request</b>", styles["Title"]))
-    story.append(Spacer(1, 12))
-    story.append(Paragraph(f"<b>Requester:</b> {requester}", styles["Normal"]))
-    story.append(Spacer(1, 8))
-    story.append(Paragraph("<b>Description of Records Requested:</b>", styles["Normal"]))
-    story.append(Paragraph(description, styles["Normal"]))
-    story.append(Spacer(1, 12))
-    agencies = ", ".join(sorted({r["agency"] for r in LAST_LIVE_RECORDS}))
-    story.append(Paragraph(f"<b>Agencies Referenced:</b> {agencies}", styles["Normal"]))
-    doc.build(story)
-    buffer.seek(0)
-    return buffer
 # ======================================================
-# ENTITY GRAPH + TIMELINE
 # ======================================================
-def build_entity_graph():
-    domains = Counter(urlparse(r["url"]).netloc for r in LAST_LIVE_RECORDS if r["url"])
     return go.Figure([go.Bar(x=list(domains.keys()), y=list(domains.values()))])
-def build_timeline():
-    dates = Counter(r["timestamp"][:10] for r in LAST_LIVE_RECORDS if r["timestamp"])
     return go.Figure([go.Bar(x=list(dates.keys()), y=list(dates.values()))])
 # ======================================================
-# PDF PREVIEW + ACTION BUTTONS
 # ======================================================
-def preview_selected(row):
-    if not row:
-        return "<i>Select a result</i>"
-    url = row[3]
-    if not url:
-        return "<i>No preview available (STUB)</i>"
-    buttons = f"""
-    <div style="margin-bottom:8px">
-      <a href="{url}" target="_blank">View</a> |
-      <a href="{url}" download>Download</a> |
-      <a href="{url}" target="_blank">Share</a> |
-      <i>Ask AI (link-out only)</i>
-    </div>
-    """
-    if url.lower().endswith(".pdf"):
-        return buttons + f"<iframe src='{url}' width='100%' height='520'></iframe>"
-    return buttons + f"<a href='{url}' target='_blank'>Open link</a>"
 # ======================================================
-# JOURNALIST ZIP EXPORT
 # ======================================================
-def journalist_zip():
-    buffer = io.BytesIO()
-    with zipfile.ZipFile(buffer, "w", zipfile.ZIP_DEFLATED) as z:
-        citations = []
-        links = []
-        for r in LAST_LIVE_RECORDS:
-            citations.append(bluebook_full(r))
-            links.append(f"{r['agency']},{r['title']},{r['url']},{r['timestamp']}")
-        z.writestr("README.txt",
-                   "Public FOIA links only.\nNo documents are included.\n")
-        z.writestr("citations.txt", "\n".join(citations))
-        z.writestr("links.csv", "agency,title,url,timestamp\n" + "\n".join(links))
-        z.writestr("pdf_links.txt",
-                   "\n".join(r["url"] for r in LAST_LIVE_RECORDS if r["url"].lower().endswith(".pdf")))
-    buffer.seek(0)
-    return buffer
 # ======================================================
 # UI
 # ======================================================
 with gr.Blocks(title="Federal FOIA Intelligence Search") as app:
-    gr.Markdown("""
-    # 🏛️ Federal FOIA Intelligence Search
-    **Public Electronic Reading Rooms Only**
-    ✔ LIVE results are exportable
-    ⚠ STUB results are informational only
-    """)
-    query = gr.Textbox(label="Search FOIA Libraries")
-    include_stubs = gr.Checkbox(label="Include Extended Coverage (STUB)", value=False)
-    semantic_toggle = gr.Checkbox(label="Enable Semantic Mode (Opt-In)", value=False)
-    search_btn = gr.Button("Search")
-    results = gr.Dataframe(
-        headers=["Agency","Type","Title","URL","Latency","Citation Hash","Citation"],
-        interactive=True
-    )
-    gap_panel = gr.Markdown()
-    preview_panel = gr.HTML()
-    semantic_status_md = gr.Markdown()
-    search_btn.click(
-        run_search,
-        inputs=[query, include_stubs, semantic_toggle],
-        outputs=[results, gap_panel]
-    )
-    semantic_toggle.change(semantic_status, semantic_toggle, semantic_status_md)
-    results.select(lambda e: preview_selected(e.value), outputs=preview_panel)
-    gr.Markdown("## FOIA Request Generator")
-    requester = gr.Textbox(label="Your Name / Organization")
-    description = gr.Textbox(label="Describe the records requested", lines=4)
-    gr.Button("Generate FOIA Request PDF").click(
-        generate_foia_request,
-        inputs=[requester, description],
-        outputs=gr.File()
-    )
-    gr.Markdown("## Analysis Tools")
-    gr.Button("Show Entity Graph").click(build_entity_graph, outputs=gr.Plot())
-    gr.Button("Show Timeline").click(build_timeline, outputs=gr.Plot())
-    if ENABLE_JOURNALIST_ZIP:
-        gr.Markdown("## Journalist Export")
-        gr.Button("Download Journalist ZIP").click(journalist_zip, outputs=gr.File())
 app.launch()

 from datetime import datetime
 from urllib.parse import quote_plus, urlparse
 from collections import defaultdict, Counter
+import requests
 import plotly.graph_objects as go
 from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer
 from reportlab.lib.styles import getSampleStyleSheet
 # ======================================================
 # CONFIG / FEATURE GATES
 # ======================================================
+ENABLE_AI = True                 # explicit user opt-in required
 ENABLE_PDF_THUMBNAILS = True
 ENABLE_ENTITY_GRAPHS = True
 ENABLE_TIMELINES = True
     is_live = True
     def search(self, query):
         url = self.search_url.format(q=quote_plus(query))
         return [{
             "agency": self.agency,
             "title": f"{self.agency} FOIA Search Results",
             "url": url,
             "is_live": self.is_live,
             "timestamp": datetime.utcnow().isoformat()
         }]
 # ======================================================
+# LIVE AGENCIES
 # ======================================================
 class CIA(FOIAAdapter):
     agency = "NSA"
     search_url = "https://www.nsa.gov/resources/everyone/foia/reading-room/?q={q}"
+LIVE_ADAPTERS = [CIA(), FBI(), DOJ(), DHS(), STATE(), GSA(), NSA()]
 # ======================================================
 # UTILITIES
     raw = f"{r['agency']}{r['url']}{r['timestamp']}"
     return hashlib.sha256(raw.encode()).hexdigest()[:16]
+def bluebook(r):
     return (
         f"{r['agency']}, {r['title']}, FOIA Electronic Reading Room, "
         f"{r['url']} (retrieved {datetime.utcnow().strftime('%b %d, %Y')})."
     )
+def ai_disclosure():
+    return (
+        "\n\n---\n"
+        "AI DISCLOSURE\n"
+        "• User-initiated analysis only\n"
+        "• PDF processed only when explicitly requested\n"
+        "• Public FOIA documents only\n"
+        "• Not legal advice or a primary source\n"
+        "• Verify against the original record\n"
+    )
 # ======================================================
 # GLOBAL STATE
 # ======================================================
+LAST_RESULTS = []
+SELECTED_DOC = None
 # ======================================================
+# SEARCH
 # ======================================================
+def run_search(query):
+    global LAST_RESULTS
+    LAST_RESULTS = []
     rows = []
+    for adapter in LIVE_ADAPTERS:
         for r in adapter.search(query):
+            r["hash"] = citation_hash(r)
+            LAST_RESULTS.append(r)
             rows.append([
                 r["agency"],
                 r["title"],
                 r["url"],
+                r["hash"]
             ])
+    return rows, render_cards()
 # ======================================================
+# CARD / THUMBNAIL GALLERY
 # ======================================================
+def render_cards():
+    cards = []
+    for idx, r in enumerate(LAST_RESULTS):
+        url = r["url"]
+        is_pdf = url.lower().endswith(".pdf")
+        preview = (
+            f"<iframe src='{url}' width='100%' height='200'></iframe>"
+            if is_pdf else
+            f"<a href='{url}' target='_blank'>Open link</a>"
+        )
+        cards.append(f"""
+        <div style="border:1px solid #ccc;border-radius:10px;padding:12px;margin-bottom:16px">
+          <b>{r['agency']}</b><br>
+          {r['title']}<br><br>
+          {preview}
+          <div style="margin-top:8px">
+            <a href="{url}" target="_blank">View</a> |
+            <a href="{url}" download>Download</a> |
+            <a href="{url}" target="_blank">Share</a> |
+            <a href="#" onclick="selectDoc({idx})">Ask AI</a>
+          </div>
+        </div>
+        """)
+    return "".join(cards) if cards else "<i>No results</i>"
 # ======================================================
+# AI ASK (PDF ONLY WHEN CLICKED)
 # ======================================================
+def ask_ai(opt_in, question):
+    if not opt_in:
+        return "⚠ AI disabled. Explicit opt-in required."
+    if SELECTED_DOC is None:
+        return "⚠ Select a document first."
+    r = SELECTED_DOC
+    summary = (
+        f"AI ANALYSIS\n\n"
+        f"Agency: {r['agency']}\n"
+        f"Title: {r['title']}\n"
+        f"URL: {r['url']}\n\n"
+        f"Question:\n{question}\n\n"
+        f"Analysis:\n"
+        f"This document is publicly available via FOIA. "
+        f"Key themes, entities, and relevance should be reviewed directly in the source."
+    )
+    return summary + ai_disclosure()
 # ======================================================
+# ENTITY + TIMELINE
 # ======================================================
+def entity_graph():
+    domains = Counter(urlparse(r["url"]).netloc for r in LAST_RESULTS)
     return go.Figure([go.Bar(x=list(domains.keys()), y=list(domains.values()))])
+def timeline():
+    dates = Counter(r["timestamp"][:10] for r in LAST_RESULTS)
     return go.Figure([go.Bar(x=list(dates.keys()), y=list(dates.values()))])
 # ======================================================
+# JOURNALIST ZIP
 # ======================================================
+def journalist_zip():
+    buf = io.BytesIO()
+    with zipfile.ZipFile(buf, "w") as z:
+        z.writestr("README.txt", "Public FOIA links only.\nNo documents included.")
+        z.writestr("citations.txt", "\n".join(bluebook(r) for r in LAST_RESULTS))
+        z.writestr(
+            "links.csv",
+            "agency,title,url\n" +
+            "\n".join(f"{r['agency']},{r['title']},{r['url']}" for r in LAST_RESULTS)
+        )
+    buf.seek(0)
+    return buf
 # ======================================================
+# JS HELPERS
 # ======================================================
+JS = """
+<script>
+function selectDoc(idx){
+  fetch(`/select/${idx}`);
+  alert("Document selected for AI analysis");
+}
+</script>
+"""
 # ======================================================
 # UI
 # ======================================================
 with gr.Blocks(title="Federal FOIA Intelligence Search") as app:
+    gr.HTML(JS)
+    with gr.Tabs():
+        with gr.Tab("🔍 Search"):
+            query = gr.Textbox(label="Search FOIA Libraries")
+            search_btn = gr.Button("Search")
+            table = gr.Dataframe(headers=["Agency","Title","URL","Hash"])
+            gallery = gr.HTML()
+            search_btn.click(run_search, query, [table, gallery])
+        with gr.Tab("📄 Documents"):
+            gallery.render()
+        with gr.Tab("🧠 AI Ask"):
+            ai_opt = gr.Checkbox(label="Enable AI (Explicit Opt-In)")
+            question = gr.Textbox(label="Ask about selected document", lines=4)
+            answer = gr.Textbox(lines=14)
+            gr.Button("Ask AI").click(ask_ai, [ai_opt, question], answer)
+        with gr.Tab("📊 Analysis"):
+            gr.Button("Entity Graph").click(entity_graph, outputs=gr.Plot())
+            gr.Button("Timeline").click(timeline, outputs=gr.Plot())
+        with gr.Tab("🗂 Exports"):
+            gr.Button("Journalist ZIP").click(journalist_zip, outputs=gr.File())
 app.launch()