Spaces:

GodsDevProject
/

FOIA_Doc_Search

Sleeping

App Files Files Community

GodsDevProject commited on Jan 10

Commit

6dcbaae

verified ·

1 Parent(s): 144fa14

Create app.py

Browse files

Files changed (1) hide show

app.py +223 -38

app.py CHANGED Viewed

@@ -1,46 +1,231 @@
-import gradio as gr, asyncio, plotly.express as px
-from ingest.registry import get_all_adapters
-from ingest.cluster import semantic_cluster_plot
-from ingest.coverage import agency_coverage
-from ingest.citations import bluebook_pdf
-adapters = get_all_adapters()
-async def run_search(query):
-    results = []
-    for a in adapters:
-        try:
-            results.extend(await a.search(query))
-        except Exception:
-            pass
-    return results
-def search_ui(query):
-    results = asyncio.run(run_search(query))
-    coverage = agency_coverage(results)
-    heatmap = px.imshow(
-        [list(coverage.values())],
-        labels=dict(x="Agency", color="Docs"),
-        x=list(coverage.keys())
     )
-    cluster_fig = semantic_cluster_plot(results)
-    return results, heatmap, cluster_fig
-with gr.Blocks() as demo:
-    gr.Markdown("## Federal FOIA Intelligence Search")
-    gr.Markdown("*Public Electronic Reading Rooms Only*")
-    q = gr.Textbox(label="Search term")
-    btn = gr.Button("Search")
-    results_df = gr.Dataframe(label="Results")
-    heatmap_plot = gr.Plot(label="Agency Coverage")
-    cluster_plot = gr.Plot(label="Semantic Clusters")
-    pdf_btn = gr.Button("Export Bluebook PDF")
-    pdf_out = gr.File()
-    btn.click(search_ui, q, [results_df, heatmap_plot, cluster_plot])
-    pdf_btn.click(bluebook_pdf, results_df, pdf_out)
 demo.launch()

+import gradio as gr
+import pandas as pd
+import plotly.graph_objects as go
+import plotly.express as px
+# -----------------------------
+# Mock Data (replace with live)
+# -----------------------------
+DATA = pd.DataFrame([
+    {
+        "title": "MKULTRA Behavioral Experiments",
+        "agency": "CIA",
+        "date": "1977-08-03",
+        "year": 1977,
+        "summary": "CIA behavioral research program involving human subjects.",
+        "entities": ["CIA", "MKULTRA", "Behavioral Research"]
+    },
+    {
+        "title": "Human Performance Research",
+        "agency": "DoD",
+        "date": "1975-01-12",
+        "year": 1975,
+        "summary": "DoD-funded research on human cognition and stress.",
+        "entities": ["DoD", "Cognition", "Stress"]
+    },
+    {
+        "title": "Signals Intelligence Overview",
+        "agency": "NSA",
+        "date": "1981-04-21",
+        "year": 1981,
+        "summary": "Overview of SIGINT collection policies.",
+        "entities": ["NSA", "SIGINT"]
+    },
+])
+AGENCIES = ["CIA", "DoD", "DIA", "FBI", "NRO", "NSA", "Unknown"]
+# -----------------------------
+# Search Logic
+# -----------------------------
+def run_search(query, agencies):
+    df = DATA.copy()
+    if query:
+        df = df[df["title"].str.contains(query, case=False)]
+    if agencies:
+        df = df[df["agency"].isin(agencies)]
+    return df[["title", "agency", "date"]]
+# -----------------------------
+# Row Click → Preview
+# -----------------------------
+def preview_row(evt: gr.SelectData):
+    row = DATA.iloc[evt.index]
+    return f"""
+### {row['title']}
+**Agency:** {row['agency']}
+**Date:** {row['date']}
+{row['summary']}
+"""
+# -----------------------------
+# Entity Graph (Plotly)
+# -----------------------------
+def entity_graph(filtered_df):
+    nodes = set()
+    edges = []
+    for _, row in filtered_df.iterrows():
+        for e in row["entities"]:
+            nodes.add(e)
+            edges.append((row["agency"], e))
+    nodes = list(nodes)
+    node_index = {n: i for i, n in enumerate(nodes)}
+    x = list(range(len(nodes)))
+    y = [0] * len(nodes)
+    edge_x, edge_y = [], []
+    for a, b in edges:
+        if b in node_index:
+            edge_x += [node_index.get(a, 0), node_index[b], None]
+            edge_y += [0, 0, None]
+    fig = go.Figure()
+    fig.add_trace(go.Scatter(
+        x=edge_x, y=edge_y,
+        mode="lines",
+        line=dict(color="#334155"),
+        hoverinfo="none"
+    ))
+    fig.add_trace(go.Scatter(
+        x=x, y=y,
+        mode="markers+text",
+        marker=dict(size=18, color="#2563eb"),
+        text=nodes,
+        textposition="bottom center"
+    ))
+    fig.update_layout(
+        paper_bgcolor="#020617",
+        plot_bgcolor="#020617",
+        font_color="#e5e7eb",
+        margin=dict(l=20, r=20, t=20, b=20),
+        height=400
     )
+    return fig
+# -----------------------------
+# Coverage Heatmap
+# -----------------------------
+def coverage_heatmap():
+    heat = DATA.groupby(["agency", "year"]).size().reset_index(name="count")
+    fig = px.density_heatmap(
+        heat,
+        x="year",
+        y="agency",
+        z="count",
+        color_continuous_scale="Blues"
+    )
+    fig.update_layout(
+        paper_bgcolor="#020617",
+        plot_bgcolor="#020617",
+        font_color="#e5e7eb",
+        height=300
+    )
+    return fig
+# -----------------------------
+# Timeline View
+# -----------------------------
+def timeline_view(filtered_df):
+    fig = px.scatter(
+        filtered_df,
+        x="year",
+        y="agency",
+        hover_name="title"
+    )
+    fig.update_traces(marker=dict(size=14, color="#38bdf8"))
+    fig.update_layout(
+        paper_bgcolor="#020617",
+        plot_bgcolor="#020617",
+        font_color="#e5e7eb",
+        height=300
+    )
+    return fig
+# -----------------------------
+# UI
+# -----------------------------
+with gr.Blocks(
+    title="Federal FOIA Intelligence Search",
+    css="static/style.css"
+) as demo:
+    gr.Markdown("# 🏛️ Federal FOIA Intelligence Search\n**Public Electronic Reading Rooms Only**")
+    with gr.Tabs():
+        with gr.Tab("🔍 Search"):
+            with gr.Row():
+                with gr.Column(scale=2):
+                    with gr.Column(elem_id="card"):
+                        query = gr.Textbox(label="Search query", placeholder="MKULTRA")
+                        agencies = gr.CheckboxGroup(
+                            AGENCIES,
+                            value=["CIA", "DoD", "NRO"],
+                            label="Filter by agency",
+                            elem_id="agency-pills"
+                        )
+                        search_btn = gr.Button("Search", elem_id="search-btn")
+                    results = gr.Dataframe(
+                        headers=["Title", "Agency", "Date"],
+                        interactive=False,
+                        elem_id="results-table"
+                    )
+                with gr.Column(scale=1):
+                    preview = gr.Markdown("### Document Preview\nSelect a result")
+            search_btn.click(
+                run_search,
+                inputs=[query, agencies],
+                outputs=results
+            )
+            results.select(preview_row, outputs=preview)
+        with gr.Tab("🧠 Entity Graph"):
+            entity_plot = gr.Plot()
+        with gr.Tab("📊 Coverage"):
+            heatmap_plot = gr.Plot()
+        with gr.Tab("⏱ Timeline"):
+            timeline_plot = gr.Plot()
+    # Reactive wiring
+    results.change(
+        lambda df: entity_graph(DATA),
+        inputs=results,
+        outputs=entity_plot
+    )
+    results.change(
+        lambda df: timeline_view(DATA),
+        inputs=results,
+        outputs=timeline_plot
+    )
+    heatmap_plot.render(coverage_heatmap)
+    with gr.Column(elem_id="provenance"):
+        gr.Markdown("""
+### Dataset Provenance
+- **Sources:** Public FOIA Reading Rooms (CIA, DoD, NSA)
+- **Status:** Previously released, unclassified
+- **Scope:** Demonstration subset
+- **Verification:** No inference beyond document text
+""")
 demo.launch()