Spaces:

GodsDevProject
/

FOIA_Doc_Search

Sleeping

App Files Files Community

GodsDevProject commited on Jan 10

Commit

88900ac

verified ·

1 Parent(s): 6a02a5b

Update app.py

Browse files

Files changed (1) hide show

app.py +100 -148

app.py CHANGED Viewed

@@ -6,15 +6,18 @@ import io
 import uuid
 from datetime import datetime
 from urllib.parse import quote_plus, urlparse
-from collections import Counter, defaultdict
 import requests
 import plotly.graph_objects as go
-from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer
 from reportlab.lib.styles import getSampleStyleSheet
 # ======================================================
-# OPTIONAL PDF TEXT EXTRACTION (SAFE / GUARDED)
 # ======================================================
 PDF_TEXT_AVAILABLE = False
@@ -25,17 +28,13 @@ except Exception:
     PDF_TEXT_AVAILABLE = False
 # ======================================================
-# CONFIG / FEATURE GATES
 # ======================================================
-ENABLE_AI = True                 # explicit opt-in required
-ENABLE_PDF_EXTRACTION = True     # checkbox gated
-ENABLE_ENTITY_GRAPHS = True
-ENABLE_TIMELINES = True
-ENABLE_JOURNALIST_ZIP = True
 ENABLE_LITIGATION_PDF = True
 ENABLE_COVERAGE_HEATMAP = True
-ENABLE_LATENCY_BADGES = True
 # ======================================================
 # BASE ADAPTER (LINK-OUT ONLY)
@@ -49,7 +48,6 @@ class FOIAAdapter:
         start = time.time()
         url = self.search_url.format(q=quote_plus(query))
         latency = round((time.time() - start) * 1000, 1)
         return [{
             "agency": self.agency,
             "title": f"{self.agency} FOIA Reading Room Result",
@@ -59,7 +57,7 @@ class FOIAAdapter:
         }]
 # ======================================================
-# LIVE AGENCIES (SAFE)
 # ======================================================
 class CIA(FOIAAdapter):
@@ -93,36 +91,29 @@ class NSA(FOIAAdapter):
 LIVE_ADAPTERS = [CIA(), FBI(), DOJ(), DHS(), STATE(), GSA(), NSA()]
 # ======================================================
-# GLOBAL STATE (IN-MEMORY ONLY)
 # ======================================================
 LAST_RESULTS = []
 SELECTED_INDEX = None
-SHARE_REGISTRY = {}
 # ======================================================
 # UTILITIES
 # ======================================================
 def citation_hash(r):
-    raw = f"{r['agency']}|{r['url']}|{r['timestamp']}"
-    return hashlib.sha256(raw.encode()).hexdigest()[:16]
-def bluebook(r):
-    return (
-        f"{r['agency']}, {r['title']}, FOIA Electronic Reading Room, "
-        f"{r['url']} (retrieved {datetime.utcnow().strftime('%b %d, %Y')})."
-    )
 def ai_disclosure():
     return (
         "\n\n---\n"
-        "AI DISCLOSURE (Court-Ready)\n"
-        "• Analysis initiated only by user\n"
-        "• PDF text extracted only with explicit opt-in\n"
         "• Public FOIA materials only\n"
         "• AI output is not evidence or legal advice\n"
-        "• Verify against the original source\n"
     )
 def hash_ai_output(text):
@@ -152,63 +143,58 @@ def run_search(query):
     return rows, render_cards()
 # ======================================================
-# CARD GALLERY
 # ======================================================
 def render_cards():
     cards = []
     for idx, r in enumerate(LAST_RESULTS):
-        url = r["url"]
-        is_pdf = url.lower().endswith(".pdf")
         preview = (
-            f"<iframe src='{url}' height='220' width='100%'></iframe>"
-            if is_pdf else
-            f"<a href='{url}' target='_blank'>Open FOIA page</a>"
         )
-        latency = f"<span class='badge'>⏱ {r['latency_ms']} ms</span>"
         cards.append(f"""
         <div class="card">
-          <div class="card-header">
-            <b>{r['agency']}</b> {latency}
-          </div>
-          <div class="card-title">{r['title']}</div>
-          {preview}
-          <div class="actions">
-            <a href="{url}" target="_blank">View</a>
-            <a href="{url}" download>Download</a>
-            <button onclick="selectDoc({idx})">Analyze / Ask AI</button>
-          </div>
         </div>
         """)
     return "".join(cards) if cards else "<i>No results</i>"
 # ======================================================
-# PDF TEXT EXTRACTION (OPT-IN)
 # ======================================================
 def extract_pdf_text(url):
     if not (PDF_TEXT_AVAILABLE and ENABLE_PDF_EXTRACTION):
         return ""
     try:
         r = requests.get(url, timeout=15)
-        with open("/tmp/tmp.pdf", "wb") as f:
             f.write(r.content)
-        return extract_text("/tmp/tmp.pdf")[:6000]
     except Exception:
         return ""
 # ======================================================
-# AI ASK (STRICTLY OPT-IN)
 # ======================================================
-def ask_ai(opt_in, extract_opt_in, question):
     if not opt_in:
-        return "⚠ AI disabled. Explicit opt-in required."
     if SELECTED_INDEX is None:
         return "⚠ Select a document first."
@@ -216,58 +202,42 @@ def ask_ai(opt_in, extract_opt_in, question):
     r = LAST_RESULTS[SELECTED_INDEX]
     context = ""
-    if extract_opt_in and r["url"].lower().endswith(".pdf"):
         context = extract_pdf_text(r["url"])
     analysis = (
-        f"AI ANALYSIS\n\n"
-        f"Agency: {r['agency']}\n"
-        f"Title: {r['title']}\n"
-        f"URL: {r['url']}\n\n"
         f"User Question:\n{question}\n\n"
-        f"Extracted Context (if any):\n{context[:1500]}\n\n"
-        f"Summary:\nThis material is publicly available via FOIA."
     )
     final = analysis + ai_disclosure()
     return final + f"\n\nIntegrity Hash: {hash_ai_output(final)}"
 # ======================================================
-# PERSISTENT SHARE PAGES (LINK-ONLY)
-# ======================================================
-def create_share():
-    token = hashlib.sha256(str(LAST_RESULTS).encode()).hexdigest()[:12]
-    SHARE_REGISTRY[token] = LAST_RESULTS.copy()
-    return f"Share ID: {token}"
-def load_share(token):
-    records = SHARE_REGISTRY.get(token)
-    if not records:
-        return "Invalid or expired share ID."
-    return "\n".join(bluebook(r) for r in records)
-# ======================================================
-# LITIGATION APPENDIX (PDF)
 # ======================================================
 def litigation_appendix():
     buf = io.BytesIO()
     doc = SimpleDocTemplate(buf)
     styles = getSampleStyleSheet()
-    story = [
-        Paragraph("<b>Litigation Appendix</b>", styles["Title"]),
-        Spacer(1, 12),
-        Paragraph(
-            f"Generated {datetime.utcnow().strftime('%B %d, %Y UTC')}",
-            styles["Normal"]
-        ),
-        Spacer(1, 12),
-    ]
     for i, r in enumerate(LAST_RESULTS, start=1):
-        story.append(Paragraph(f"<b>Exhibit A-{i}</b>", styles["Heading2"]))
-        story.append(Paragraph(bluebook(r), styles["Normal"]))
         story.append(Spacer(1, 8))
     doc.build(story)
@@ -291,79 +261,61 @@ def coverage_heatmap():
     )
 # ======================================================
-# ENTITY / TIMELINE
 # ======================================================
-def entity_graph():
-    domains = Counter(urlparse(r["url"]).netloc for r in LAST_RESULTS)
-    return go.Figure([go.Bar(x=list(domains.keys()), y=list(domains.values()))])
-def timeline():
-    dates = Counter(r["timestamp"][:10] for r in LAST_RESULTS)
-    return go.Figure([go.Bar(x=list(dates.keys()), y=list(dates.values()))])
-# ======================================================
-# JOURNALIST ZIP
-# ======================================================
-def journalist_zip():
-    buf = io.BytesIO()
-    with zipfile.ZipFile(buf, "w") as z:
-        z.writestr("citations.txt", "\n".join(bluebook(r) for r in LAST_RESULTS))
-        z.writestr(
-            "links.csv",
-            "agency,title,url\n" +
-            "\n".join(f"{r['agency']},{r['title']},{r['url']}" for r in LAST_RESULTS)
-        )
-    buf.seek(0)
-    return buf
 # ======================================================
 # UI
 # ======================================================
 CSS = """
-.card {border:1px solid #ddd;border-radius:12px;padding:14px;margin-bottom:18px}
 .card-header {display:flex;justify-content:space-between}
-.card-title {margin:6px 0 10px 0}
-.actions a, .actions button {margin-right:10px}
-.badge {background:#eef;padding:2px 6px;border-radius:6px;font-size:12px}
 """
 with gr.Blocks(css=CSS, title="Federal FOIA Intelligence Search") as app:
-    gr.Markdown("# 🏛️ Federal FOIA Intelligence Search\nPublic FOIA Reading Rooms Only")
-    with gr.Tabs():
-        with gr.Tab("🔍 Search"):
-            query = gr.Textbox(label="Search FOIA Libraries")
-            search_btn = gr.Button("Search")
-            table = gr.Dataframe(headers=["Agency","Title","URL","Hash","Latency"])
-            gallery = gr.HTML()
-            search_btn.click(run_search, query, [table, gallery])
-        with gr.Tab("🧠 Ask AI"):
-            ai_opt = gr.Checkbox(label="Enable AI (Explicit Opt-In)")
-            pdf_opt = gr.Checkbox(label="Allow PDF Text Extraction")
-            question = gr.Textbox(label="Ask about selected document", lines=4)
-            answer = gr.Textbox(lines=18)
-            gr.Button("Ask AI").click(ask_ai, [ai_opt, pdf_opt, question], answer)
-        with gr.Tab("📊 Analysis"):
-            gr.Button("Coverage Heatmap").click(coverage_heatmap, outputs=gr.Plot())
-            gr.Button("Entity Graph").click(entity_graph, outputs=gr.Plot())
-            gr.Button("Timeline").click(timeline, outputs=gr.Plot())
-        with gr.Tab("📤 Share"):
-            gr.Button("Create Share Page").click(create_share, outputs=gr.Textbox())
-            share_id = gr.Textbox(label="Load Share ID")
-            gr.Button("Load").click(load_share, share_id, gr.Textbox(lines=10))
-        with gr.Tab("⚖️ Court Tools"):
-            gr.Button("Generate Litigation Appendix PDF").click(
-                litigation_appendix, outputs=gr.File()
-            )
-        with gr.Tab("🗂 Exports"):
-            gr.Button("Journalist ZIP").click(journalist_zip, outputs=gr.File())
 app.launch()

 import uuid
 from datetime import datetime
 from urllib.parse import quote_plus, urlparse
+from collections import Counter
 import requests
 import plotly.graph_objects as go
+from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, PageBreak
 from reportlab.lib.styles import getSampleStyleSheet
+from citations import bluebook_exhibit, table_of_authorities
+from foia_requests import generate_foia_request_text
 # ======================================================
+# OPTIONAL PDF TEXT EXTRACTION (STRICTLY OPT-IN)
 # ======================================================
 PDF_TEXT_AVAILABLE = False
     PDF_TEXT_AVAILABLE = False
 # ======================================================
+# FEATURE GATES (HF SAFE)
 # ======================================================
+ENABLE_AI = True
+ENABLE_PDF_EXTRACTION = True
 ENABLE_LITIGATION_PDF = True
 ENABLE_COVERAGE_HEATMAP = True
 # ======================================================
 # BASE ADAPTER (LINK-OUT ONLY)
         start = time.time()
         url = self.search_url.format(q=quote_plus(query))
         latency = round((time.time() - start) * 1000, 1)
         return [{
             "agency": self.agency,
             "title": f"{self.agency} FOIA Reading Room Result",
         }]
 # ======================================================
+# LIVE AGENCIES (PUBLIC READING ROOMS)
 # ======================================================
 class CIA(FOIAAdapter):
 LIVE_ADAPTERS = [CIA(), FBI(), DOJ(), DHS(), STATE(), GSA(), NSA()]
 # ======================================================
+# GLOBAL STATE (SESSION MEMORY ONLY)
 # ======================================================
 LAST_RESULTS = []
 SELECTED_INDEX = None
 # ======================================================
 # UTILITIES
 # ======================================================
 def citation_hash(r):
+    return hashlib.sha256(
+        f"{r['agency']}|{r['url']}|{r['timestamp']}".encode()
+    ).hexdigest()[:16]
 def ai_disclosure():
     return (
         "\n\n---\n"
+        "AI DISCLOSURE\n"
+        "• User-initiated analysis only\n"
         "• Public FOIA materials only\n"
         "• AI output is not evidence or legal advice\n"
+        "• Verify against original sources\n"
     )
 def hash_ai_output(text):
     return rows, render_cards()
 # ======================================================
+# RESULTS CARDS (POLISHED)
 # ======================================================
 def render_cards():
     cards = []
     for idx, r in enumerate(LAST_RESULTS):
         preview = (
+            f"<iframe src='{r['url']}' height='220' width='100%'></iframe>"
+            if r["url"].lower().endswith(".pdf")
+            else f"<a href='{r['url']}' target='_blank'>Open FOIA Page</a>"
         )
         cards.append(f"""
         <div class="card">
+            <div class="card-header">
+                <b>{r['agency']}</b>
+                <span class="badge">⏱ {r['latency_ms']} ms</span>
+            </div>
+            <div class="card-title">{r['title']}</div>
+            {preview}
+            <div class="actions">
+                <button onclick="selectDoc({idx})">Ask AI</button>
+                <a href="{r['url']}" target="_blank">View</a>
+                <a href="{r['url']}" download>Download</a>
+            </div>
         </div>
         """)
     return "".join(cards) if cards else "<i>No results</i>"
 # ======================================================
+# PDF EXTRACTION (OPT-IN)
 # ======================================================
 def extract_pdf_text(url):
     if not (PDF_TEXT_AVAILABLE and ENABLE_PDF_EXTRACTION):
         return ""
     try:
         r = requests.get(url, timeout=15)
+        with open("/tmp/doc.pdf", "wb") as f:
             f.write(r.content)
+        return extract_text("/tmp/doc.pdf")[:6000]
     except Exception:
         return ""
 # ======================================================
+# AI ASK + CITATION CROSS-CHECK
 # ======================================================
+def ask_ai(opt_in, pdf_opt_in, question):
     if not opt_in:
+        return "⚠ AI requires explicit opt-in."
     if SELECTED_INDEX is None:
         return "⚠ Select a document first."
     r = LAST_RESULTS[SELECTED_INDEX]
     context = ""
+    if pdf_opt_in and r["url"].lower().endswith(".pdf"):
         context = extract_pdf_text(r["url"])
     analysis = (
+        f"{bluebook_exhibit(r, SELECTED_INDEX + 1)}\n\n"
         f"User Question:\n{question}\n\n"
+        f"Extracted Context:\n{context[:1500]}\n\n"
+        f"AI Summary:\nThis is a public FOIA document. "
+        f"Assertions should be verified against the cited exhibit."
     )
     final = analysis + ai_disclosure()
     return final + f"\n\nIntegrity Hash: {hash_ai_output(final)}"
 # ======================================================
+# LITIGATION APPENDIX (WITH TOA)
 # ======================================================
 def litigation_appendix():
     buf = io.BytesIO()
     doc = SimpleDocTemplate(buf)
     styles = getSampleStyleSheet()
+    story = []
+    story.append(Paragraph("Litigation Appendix", styles["Title"]))
+    story.append(Spacer(1, 12))
+    story.append(Paragraph("Table of Authorities", styles["Heading1"]))
+    for line in table_of_authorities(LAST_RESULTS):
+        story.append(Paragraph(line, styles["Normal"]))
+    story.append(PageBreak())
     for i, r in enumerate(LAST_RESULTS, start=1):
+        story.append(Paragraph(f"Exhibit A-{i}", styles["Heading2"]))
+        story.append(Paragraph(bluebook_exhibit(r, i), styles["Normal"]))
         story.append(Spacer(1, 8))
     doc.build(story)
     )
 # ======================================================
+# FOIA REQUEST GENERATOR
 # ======================================================
+def foia_request(agency, subject, requester):
+    return generate_foia_request_text(agency, subject, requester)
 # ======================================================
 # UI
 # ======================================================
 CSS = """
+.search textarea {font-size:18px;padding:14px}
+.card {border:1px solid #ddd;border-radius:14px;padding:16px;margin-bottom:18px}
 .card-header {display:flex;justify-content:space-between}
+.card-title {margin:8px 0 12px}
+.actions button, .actions a {margin-right:10px}
+.badge {background:#eef;padding:4px 8px;border-radius:8px;font-size:12px}
 """
 with gr.Blocks(css=CSS, title="Federal FOIA Intelligence Search") as app:
+    gr.Markdown("## 🏛️ Federal FOIA Intelligence Search\nPublic Reading Rooms Only")
+    with gr.Tab("🔍 Search"):
+        query = gr.Textbox(
+            label="Search FOIA Reading Rooms",
+            elem_classes=["search"],
+            placeholder="e.g. procurement, AATIP, surveillance"
+        )
+        search_btn = gr.Button("Search", variant="primary")
+        table = gr.Dataframe(headers=["Agency","Title","URL","Hash","Latency"])
+        gallery = gr.HTML()
+        search_btn.click(run_search, query, [table, gallery])
+    with gr.Tab("🧠 Ask AI"):
+        ai_opt = gr.Checkbox(label="Enable AI (Explicit Opt-In)")
+        pdf_opt = gr.Checkbox(label="Allow PDF Text Extraction")
+        question = gr.Textbox(lines=4)
+        answer = gr.Textbox(lines=18)
+        gr.Button("Ask AI").click(ask_ai, [ai_opt, pdf_opt, question], answer)
+    with gr.Tab("📊 Analysis"):
+        gr.Button("Coverage Heatmap").click(coverage_heatmap, outputs=gr.Plot())
+    with gr.Tab("⚖️ Court Tools"):
+        gr.Button("Generate Litigation Appendix PDF").click(
+            litigation_appendix, outputs=gr.File()
+        )
+    with gr.Tab("📝 FOIA Request"):
+        agency = gr.Textbox(label="Agency")
+        subject = gr.Textbox(label="Records Requested")
+        requester = gr.Textbox(label="Requester Name")
+        output = gr.Textbox(lines=14)
+        gr.Button("Generate FOIA Request").click(
+            foia_request, [agency, subject, requester], output
+        )
 app.launch()