GodsDevProject commited on
Commit
803ee48
·
verified ·
1 Parent(s): e4e1f70

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +149 -46
app.py CHANGED
@@ -1,67 +1,170 @@
1
  import gradio as gr
2
- import pandas as pd
3
  from typing import List, Dict
4
 
5
- from ingest.loader import ingest_documents
6
- from semantic import build_faiss_index, semantic_search
7
  from entity_graph import build_entity_graph
 
 
8
 
9
- # ----------------------
10
- # CONFIG
11
- # ----------------------
12
- ENABLE_SCRAPING = False # HF SAFE DEFAULT
13
-
14
- # ----------------------
15
- # BASE DATA
16
- # ----------------------
17
- DOCUMENTS = [
18
- {"title": "MKULTRA Overview", "agency": "CIA", "date": "1977", "content": "MKULTRA program"},
19
- {"title": "UAP Report", "agency": "DoD", "date": "2021", "content": "UAP assessment"}
 
 
 
 
 
 
20
  ]
21
 
22
- DOCUMENTS += ingest_documents(ENABLE_SCRAPING)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
 
24
- FAISS_INDEX, _ = build_faiss_index(DOCUMENTS)
 
 
 
25
 
26
- # ----------------------
27
- # SEARCH
28
- # ----------------------
29
- def search(query, agencies):
30
- results = []
 
 
 
 
 
 
 
 
 
 
 
31
  for d in DOCUMENTS:
32
- if agencies and d["agency"] not in agencies:
33
- continue
34
  if query.lower() in d["content"].lower():
35
- results.append(d)
36
- return pd.DataFrame(results)
 
37
 
38
- def semantic(query):
39
- return pd.DataFrame(semantic_search(query, DOCUMENTS, FAISS_INDEX))
 
40
 
41
- def entity_graph_json():
42
- return build_entity_graph(DOCUMENTS)
43
 
44
- # ----------------------
45
- # UI
46
- # ----------------------
47
- with gr.Blocks() as app:
48
- gr.Markdown("# 🗂️ FOIA Declassified Document Search (Advanced)")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
49
 
50
- with gr.Row():
51
- q = gr.Textbox(label="Search")
52
- agency = gr.CheckboxGroup(["CIA", "DoD", "FBI", "NSA", "NRO", "USAF"], label="Agency")
 
53
 
54
- gr.Button("Keyword Search").click(search, [q, agency], gr.Dataframe())
55
- gr.Button("Semantic Search").click(semantic, q, gr.Dataframe())
56
 
57
- gr.Markdown("### Entity Graph (JSON)")
58
- gr.JSON(entity_graph_json)
59
 
60
  gr.Markdown("""
61
- ### Dataset Provenance
62
- - Public FOIA releases only
63
- - Demo + optional ingestion
64
- - No intelligence claims
65
- """)
 
 
66
 
67
  app.launch()
 
1
  import gradio as gr
2
+ import json
3
  from typing import List, Dict
4
 
 
 
5
  from entity_graph import build_entity_graph
6
+ from audit import log_event, export_audit_log
7
+ from collaboration import add_collaboration_note, get_collaboration_dataset
8
 
9
+ # ---------------------------------------------------------
10
+ # DEMO DATA (PUBLIC, UNCLASSIFIED, PREVIOUSLY RELEASED)
11
+ # ---------------------------------------------------------
12
+
13
+ DOCUMENTS: List[Dict] = [
14
+ {
15
+ "title": "AATIP Summary Memorandum",
16
+ "agency": "DoD",
17
+ "date": "2019-05-01",
18
+ "content": "AATIP ADVANCED AEROSPACE THREAT IDENTIFICATION PROGRAM"
19
+ },
20
+ {
21
+ "title": "UAP Sensor Analysis",
22
+ "agency": "NRO",
23
+ "date": "2021-06-25",
24
+ "content": "UAP SENSOR SATELLITE ORBIT ANALYSIS"
25
+ }
26
  ]
27
 
28
+ # ---------------------------------------------------------
29
+ # D3 GRAPH RENDERER (INLINE, CLIENT-SIDE ONLY)
30
+ # ---------------------------------------------------------
31
+
32
+ def render_d3_graph(graph_json: Dict) -> str:
33
+ return f"""
34
+ <div id="graph"></div>
35
+ <script src="https://d3js.org/d3.v7.min.js"></script>
36
+ <script>
37
+ const data = {json.dumps(graph_json)};
38
+
39
+ const width = 650, height = 420;
40
+
41
+ const svg = d3.select("#graph")
42
+ .append("svg")
43
+ .attr("width", width)
44
+ .attr("height", height);
45
+
46
+ const simulation = d3.forceSimulation(data.nodes)
47
+ .force("link", d3.forceLink(data.links).id(d => d.id).distance(90))
48
+ .force("charge", d3.forceManyBody().strength(-250))
49
+ .force("center", d3.forceCenter(width / 2, height / 2));
50
+
51
+ const link = svg.append("g")
52
+ .selectAll("line")
53
+ .data(data.links)
54
+ .enter().append("line")
55
+ .style("stroke", "#aaa");
56
+
57
+ const node = svg.append("g")
58
+ .selectAll("circle")
59
+ .data(data.nodes)
60
+ .enter().append("circle")
61
+ .attr("r", 6)
62
+ .style("fill", d => d.group === "agency" ? "#ff7f0e" : "#1f77b4")
63
+ .call(d3.drag()
64
+ .on("start", dragstarted)
65
+ .on("drag", dragged)
66
+ .on("end", dragended));
67
+
68
+ node.append("title").text(d => d.id);
69
+
70
+ simulation.on("tick", () => {{
71
+ link
72
+ .attr("x1", d => d.source.x)
73
+ .attr("y1", d => d.source.y)
74
+ .attr("x2", d => d.target.x)
75
+ .attr("y2", d => d.target.y);
76
+
77
+ node
78
+ .attr("cx", d => d.x)
79
+ .attr("cy", d => d.y);
80
+ }});
81
+
82
+ function dragstarted(event, d) {{
83
+ if (!event.active) simulation.alphaTarget(0.3).restart();
84
+ d.fx = d.x;
85
+ d.fy = d.y;
86
+ }}
87
 
88
+ function dragged(event, d) {{
89
+ d.fx = event.x;
90
+ d.fy = event.y;
91
+ }}
92
 
93
+ function dragended(event, d) {{
94
+ if (!event.active) simulation.alphaTarget(0);
95
+ d.fx = null;
96
+ d.fy = null;
97
+ }}
98
+ </script>
99
+ """
100
+
101
+ # ---------------------------------------------------------
102
+ # SEARCH LOGIC
103
+ # ---------------------------------------------------------
104
+
105
+ def run_search(query: str, agencies: List[str]):
106
+ log_event("search", {"query": query, "agencies": agencies})
107
+
108
+ rows = []
109
  for d in DOCUMENTS:
 
 
110
  if query.lower() in d["content"].lower():
111
+ if not agencies or d["agency"] in agencies:
112
+ rows.append([d["title"], d["agency"], d["date"]])
113
+ return rows
114
 
115
+ # ---------------------------------------------------------
116
+ # UI
117
+ # ---------------------------------------------------------
118
 
119
+ with gr.Blocks(title="FOIA Declassified Document Search") as app:
 
120
 
121
+ gr.Markdown("""
122
+ # 🗂️ FOIA Declassified Document Search
123
+
124
+ ⚠️ **Demo Mode – Public FOIA samples only**
125
+
126
+ This tool analyzes *mentions*, not facts.
127
+ No classified material. No live government systems.
128
+ """)
129
+
130
+ with gr.Tab("🔍 Search"):
131
+ query = gr.Textbox(label="Search query")
132
+ agency_filter = gr.CheckboxGroup(
133
+ ["CIA", "DoD", "DIA", "FBI", "NRO", "NSA", "Unknown"],
134
+ label="Filter by agency"
135
+ )
136
+ output = gr.Dataframe(
137
+ headers=["Title", "Agency", "Date"],
138
+ interactive=False
139
+ )
140
+ gr.Button("Search").click(run_search, [query, agency_filter], output)
141
+
142
+ with gr.Tab("🧠 Entity Graph"):
143
+ graph = build_entity_graph(DOCUMENTS)
144
+ gr.HTML(render_d3_graph(graph))
145
+
146
+ with gr.Tab("🤝 Collaboration"):
147
+ doc = gr.Textbox(label="Document title")
148
+ note = gr.Textbox(label="Note")
149
+ collab_out = gr.Dataframe()
150
 
151
+ def add_note_ui(d, n):
152
+ log_event("collaboration_note", {"document": d})
153
+ add_collaboration_note(d, n)
154
+ return get_collaboration_dataset().to_pandas()
155
 
156
+ gr.Button("Add note").click(add_note_ui, [doc, note], collab_out)
 
157
 
158
+ with gr.Tab("🧾 Audit Log"):
159
+ gr.JSON(export_audit_log)
160
 
161
  gr.Markdown("""
162
+ ### Dataset Provenance
163
+ - Sources: Public FOIA releases (FBI Vault, CIA Reading Room, DoD FOIA)
164
+ - Status: Previously released, unclassified
165
+ - Scope: Demo subset only
166
+ - Curation: Manual sampling for demonstration
167
+ - Verification: No claims beyond document text
168
+ """)
169
 
170
  app.launch()