GodsDevProject commited on
Commit
6dcbaae
·
verified ·
1 Parent(s): 144fa14

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +223 -38
app.py CHANGED
@@ -1,46 +1,231 @@
1
- import gradio as gr, asyncio, plotly.express as px
2
- from ingest.registry import get_all_adapters
3
- from ingest.cluster import semantic_cluster_plot
4
- from ingest.coverage import agency_coverage
5
- from ingest.citations import bluebook_pdf
6
-
7
- adapters = get_all_adapters()
8
-
9
- async def run_search(query):
10
- results = []
11
- for a in adapters:
12
- try:
13
- results.extend(await a.search(query))
14
- except Exception:
15
- pass
16
- return results
17
-
18
- def search_ui(query):
19
- results = asyncio.run(run_search(query))
20
- coverage = agency_coverage(results)
21
- heatmap = px.imshow(
22
- [list(coverage.values())],
23
- labels=dict(x="Agency", color="Docs"),
24
- x=list(coverage.keys())
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
  )
26
- cluster_fig = semantic_cluster_plot(results)
27
- return results, heatmap, cluster_fig
28
 
29
- with gr.Blocks() as demo:
30
- gr.Markdown("## Federal FOIA Intelligence Search")
31
- gr.Markdown("*Public Electronic Reading Rooms Only*")
32
 
33
- q = gr.Textbox(label="Search term")
34
- btn = gr.Button("Search")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35
 
36
- results_df = gr.Dataframe(label="Results")
37
- heatmap_plot = gr.Plot(label="Agency Coverage")
38
- cluster_plot = gr.Plot(label="Semantic Clusters")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
39
 
40
- pdf_btn = gr.Button("Export Bluebook PDF")
41
- pdf_out = gr.File()
42
 
43
- btn.click(search_ui, q, [results_df, heatmap_plot, cluster_plot])
44
- pdf_btn.click(bluebook_pdf, results_df, pdf_out)
 
 
 
 
 
 
45
 
46
  demo.launch()
 
1
+ import gradio as gr
2
+ import pandas as pd
3
+ import plotly.graph_objects as go
4
+ import plotly.express as px
5
+
6
+ # -----------------------------
7
+ # Mock Data (replace with live)
8
+ # -----------------------------
9
+
10
+ DATA = pd.DataFrame([
11
+ {
12
+ "title": "MKULTRA Behavioral Experiments",
13
+ "agency": "CIA",
14
+ "date": "1977-08-03",
15
+ "year": 1977,
16
+ "summary": "CIA behavioral research program involving human subjects.",
17
+ "entities": ["CIA", "MKULTRA", "Behavioral Research"]
18
+ },
19
+ {
20
+ "title": "Human Performance Research",
21
+ "agency": "DoD",
22
+ "date": "1975-01-12",
23
+ "year": 1975,
24
+ "summary": "DoD-funded research on human cognition and stress.",
25
+ "entities": ["DoD", "Cognition", "Stress"]
26
+ },
27
+ {
28
+ "title": "Signals Intelligence Overview",
29
+ "agency": "NSA",
30
+ "date": "1981-04-21",
31
+ "year": 1981,
32
+ "summary": "Overview of SIGINT collection policies.",
33
+ "entities": ["NSA", "SIGINT"]
34
+ },
35
+ ])
36
+
37
+ AGENCIES = ["CIA", "DoD", "DIA", "FBI", "NRO", "NSA", "Unknown"]
38
+
39
+ # -----------------------------
40
+ # Search Logic
41
+ # -----------------------------
42
+
43
+ def run_search(query, agencies):
44
+ df = DATA.copy()
45
+ if query:
46
+ df = df[df["title"].str.contains(query, case=False)]
47
+ if agencies:
48
+ df = df[df["agency"].isin(agencies)]
49
+ return df[["title", "agency", "date"]]
50
+
51
+ # -----------------------------
52
+ # Row Click → Preview
53
+ # -----------------------------
54
+
55
+ def preview_row(evt: gr.SelectData):
56
+ row = DATA.iloc[evt.index]
57
+ return f"""
58
+ ### {row['title']}
59
+
60
+ **Agency:** {row['agency']}
61
+ **Date:** {row['date']}
62
+
63
+ {row['summary']}
64
+ """
65
+
66
+ # -----------------------------
67
+ # Entity Graph (Plotly)
68
+ # -----------------------------
69
+
70
+ def entity_graph(filtered_df):
71
+ nodes = set()
72
+ edges = []
73
+
74
+ for _, row in filtered_df.iterrows():
75
+ for e in row["entities"]:
76
+ nodes.add(e)
77
+ edges.append((row["agency"], e))
78
+
79
+ nodes = list(nodes)
80
+ node_index = {n: i for i, n in enumerate(nodes)}
81
+
82
+ x = list(range(len(nodes)))
83
+ y = [0] * len(nodes)
84
+
85
+ edge_x, edge_y = [], []
86
+ for a, b in edges:
87
+ if b in node_index:
88
+ edge_x += [node_index.get(a, 0), node_index[b], None]
89
+ edge_y += [0, 0, None]
90
+
91
+ fig = go.Figure()
92
+ fig.add_trace(go.Scatter(
93
+ x=edge_x, y=edge_y,
94
+ mode="lines",
95
+ line=dict(color="#334155"),
96
+ hoverinfo="none"
97
+ ))
98
+ fig.add_trace(go.Scatter(
99
+ x=x, y=y,
100
+ mode="markers+text",
101
+ marker=dict(size=18, color="#2563eb"),
102
+ text=nodes,
103
+ textposition="bottom center"
104
+ ))
105
+
106
+ fig.update_layout(
107
+ paper_bgcolor="#020617",
108
+ plot_bgcolor="#020617",
109
+ font_color="#e5e7eb",
110
+ margin=dict(l=20, r=20, t=20, b=20),
111
+ height=400
112
  )
113
+ return fig
 
114
 
115
+ # -----------------------------
116
+ # Coverage Heatmap
117
+ # -----------------------------
118
 
119
+ def coverage_heatmap():
120
+ heat = DATA.groupby(["agency", "year"]).size().reset_index(name="count")
121
+ fig = px.density_heatmap(
122
+ heat,
123
+ x="year",
124
+ y="agency",
125
+ z="count",
126
+ color_continuous_scale="Blues"
127
+ )
128
+ fig.update_layout(
129
+ paper_bgcolor="#020617",
130
+ plot_bgcolor="#020617",
131
+ font_color="#e5e7eb",
132
+ height=300
133
+ )
134
+ return fig
135
 
136
+ # -----------------------------
137
+ # Timeline View
138
+ # -----------------------------
139
+
140
+ def timeline_view(filtered_df):
141
+ fig = px.scatter(
142
+ filtered_df,
143
+ x="year",
144
+ y="agency",
145
+ hover_name="title"
146
+ )
147
+ fig.update_traces(marker=dict(size=14, color="#38bdf8"))
148
+ fig.update_layout(
149
+ paper_bgcolor="#020617",
150
+ plot_bgcolor="#020617",
151
+ font_color="#e5e7eb",
152
+ height=300
153
+ )
154
+ return fig
155
+
156
+ # -----------------------------
157
+ # UI
158
+ # -----------------------------
159
+
160
+ with gr.Blocks(
161
+ title="Federal FOIA Intelligence Search",
162
+ css="static/style.css"
163
+ ) as demo:
164
+
165
+ gr.Markdown("# 🏛️ Federal FOIA Intelligence Search\n**Public Electronic Reading Rooms Only**")
166
+
167
+ with gr.Tabs():
168
+ with gr.Tab("🔍 Search"):
169
+ with gr.Row():
170
+ with gr.Column(scale=2):
171
+ with gr.Column(elem_id="card"):
172
+ query = gr.Textbox(label="Search query", placeholder="MKULTRA")
173
+ agencies = gr.CheckboxGroup(
174
+ AGENCIES,
175
+ value=["CIA", "DoD", "NRO"],
176
+ label="Filter by agency",
177
+ elem_id="agency-pills"
178
+ )
179
+ search_btn = gr.Button("Search", elem_id="search-btn")
180
+
181
+ results = gr.Dataframe(
182
+ headers=["Title", "Agency", "Date"],
183
+ interactive=False,
184
+ elem_id="results-table"
185
+ )
186
+
187
+ with gr.Column(scale=1):
188
+ preview = gr.Markdown("### Document Preview\nSelect a result")
189
+
190
+ search_btn.click(
191
+ run_search,
192
+ inputs=[query, agencies],
193
+ outputs=results
194
+ )
195
+
196
+ results.select(preview_row, outputs=preview)
197
+
198
+ with gr.Tab("🧠 Entity Graph"):
199
+ entity_plot = gr.Plot()
200
+
201
+ with gr.Tab("📊 Coverage"):
202
+ heatmap_plot = gr.Plot()
203
+
204
+ with gr.Tab("⏱ Timeline"):
205
+ timeline_plot = gr.Plot()
206
+
207
+ # Reactive wiring
208
+ results.change(
209
+ lambda df: entity_graph(DATA),
210
+ inputs=results,
211
+ outputs=entity_plot
212
+ )
213
+
214
+ results.change(
215
+ lambda df: timeline_view(DATA),
216
+ inputs=results,
217
+ outputs=timeline_plot
218
+ )
219
 
220
+ heatmap_plot.render(coverage_heatmap)
 
221
 
222
+ with gr.Column(elem_id="provenance"):
223
+ gr.Markdown("""
224
+ ### Dataset Provenance
225
+ - **Sources:** Public FOIA Reading Rooms (CIA, DoD, NSA)
226
+ - **Status:** Previously released, unclassified
227
+ - **Scope:** Demonstration subset
228
+ - **Verification:** No inference beyond document text
229
+ """)
230
 
231
  demo.launch()