Update app.py
Browse files
app.py
CHANGED
|
@@ -1,67 +1,170 @@
|
|
| 1 |
import gradio as gr
|
| 2 |
-
import
|
| 3 |
from typing import List, Dict
|
| 4 |
|
| 5 |
-
from ingest.loader import ingest_documents
|
| 6 |
-
from semantic import build_faiss_index, semantic_search
|
| 7 |
from entity_graph import build_entity_graph
|
|
|
|
|
|
|
| 8 |
|
| 9 |
-
# ----------------------
|
| 10 |
-
#
|
| 11 |
-
# ----------------------
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 20 |
]
|
| 21 |
|
| 22 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 23 |
|
| 24 |
-
|
|
|
|
|
|
|
|
|
|
| 25 |
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 31 |
for d in DOCUMENTS:
|
| 32 |
-
if agencies and d["agency"] not in agencies:
|
| 33 |
-
continue
|
| 34 |
if query.lower() in d["content"].lower():
|
| 35 |
-
|
| 36 |
-
|
|
|
|
| 37 |
|
| 38 |
-
|
| 39 |
-
|
|
|
|
| 40 |
|
| 41 |
-
|
| 42 |
-
return build_entity_graph(DOCUMENTS)
|
| 43 |
|
| 44 |
-
|
| 45 |
-
#
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 49 |
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
|
|
|
| 53 |
|
| 54 |
-
|
| 55 |
-
gr.Button("Semantic Search").click(semantic, q, gr.Dataframe())
|
| 56 |
|
| 57 |
-
gr.
|
| 58 |
-
|
| 59 |
|
| 60 |
gr.Markdown("""
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
|
|
|
|
|
|
|
| 66 |
|
| 67 |
app.launch()
|
|
|
|
| 1 |
import gradio as gr
|
| 2 |
+
import json
|
| 3 |
from typing import List, Dict
|
| 4 |
|
|
|
|
|
|
|
| 5 |
from entity_graph import build_entity_graph
|
| 6 |
+
from audit import log_event, export_audit_log
|
| 7 |
+
from collaboration import add_collaboration_note, get_collaboration_dataset
|
| 8 |
|
| 9 |
+
# ---------------------------------------------------------
|
| 10 |
+
# DEMO DATA (PUBLIC, UNCLASSIFIED, PREVIOUSLY RELEASED)
|
| 11 |
+
# ---------------------------------------------------------
|
| 12 |
+
|
| 13 |
+
DOCUMENTS: List[Dict] = [
|
| 14 |
+
{
|
| 15 |
+
"title": "AATIP Summary Memorandum",
|
| 16 |
+
"agency": "DoD",
|
| 17 |
+
"date": "2019-05-01",
|
| 18 |
+
"content": "AATIP ADVANCED AEROSPACE THREAT IDENTIFICATION PROGRAM"
|
| 19 |
+
},
|
| 20 |
+
{
|
| 21 |
+
"title": "UAP Sensor Analysis",
|
| 22 |
+
"agency": "NRO",
|
| 23 |
+
"date": "2021-06-25",
|
| 24 |
+
"content": "UAP SENSOR SATELLITE ORBIT ANALYSIS"
|
| 25 |
+
}
|
| 26 |
]
|
| 27 |
|
| 28 |
+
# ---------------------------------------------------------
|
| 29 |
+
# D3 GRAPH RENDERER (INLINE, CLIENT-SIDE ONLY)
|
| 30 |
+
# ---------------------------------------------------------
|
| 31 |
+
|
| 32 |
+
def render_d3_graph(graph_json: Dict) -> str:
|
| 33 |
+
return f"""
|
| 34 |
+
<div id="graph"></div>
|
| 35 |
+
<script src="https://d3js.org/d3.v7.min.js"></script>
|
| 36 |
+
<script>
|
| 37 |
+
const data = {json.dumps(graph_json)};
|
| 38 |
+
|
| 39 |
+
const width = 650, height = 420;
|
| 40 |
+
|
| 41 |
+
const svg = d3.select("#graph")
|
| 42 |
+
.append("svg")
|
| 43 |
+
.attr("width", width)
|
| 44 |
+
.attr("height", height);
|
| 45 |
+
|
| 46 |
+
const simulation = d3.forceSimulation(data.nodes)
|
| 47 |
+
.force("link", d3.forceLink(data.links).id(d => d.id).distance(90))
|
| 48 |
+
.force("charge", d3.forceManyBody().strength(-250))
|
| 49 |
+
.force("center", d3.forceCenter(width / 2, height / 2));
|
| 50 |
+
|
| 51 |
+
const link = svg.append("g")
|
| 52 |
+
.selectAll("line")
|
| 53 |
+
.data(data.links)
|
| 54 |
+
.enter().append("line")
|
| 55 |
+
.style("stroke", "#aaa");
|
| 56 |
+
|
| 57 |
+
const node = svg.append("g")
|
| 58 |
+
.selectAll("circle")
|
| 59 |
+
.data(data.nodes)
|
| 60 |
+
.enter().append("circle")
|
| 61 |
+
.attr("r", 6)
|
| 62 |
+
.style("fill", d => d.group === "agency" ? "#ff7f0e" : "#1f77b4")
|
| 63 |
+
.call(d3.drag()
|
| 64 |
+
.on("start", dragstarted)
|
| 65 |
+
.on("drag", dragged)
|
| 66 |
+
.on("end", dragended));
|
| 67 |
+
|
| 68 |
+
node.append("title").text(d => d.id);
|
| 69 |
+
|
| 70 |
+
simulation.on("tick", () => {{
|
| 71 |
+
link
|
| 72 |
+
.attr("x1", d => d.source.x)
|
| 73 |
+
.attr("y1", d => d.source.y)
|
| 74 |
+
.attr("x2", d => d.target.x)
|
| 75 |
+
.attr("y2", d => d.target.y);
|
| 76 |
+
|
| 77 |
+
node
|
| 78 |
+
.attr("cx", d => d.x)
|
| 79 |
+
.attr("cy", d => d.y);
|
| 80 |
+
}});
|
| 81 |
+
|
| 82 |
+
function dragstarted(event, d) {{
|
| 83 |
+
if (!event.active) simulation.alphaTarget(0.3).restart();
|
| 84 |
+
d.fx = d.x;
|
| 85 |
+
d.fy = d.y;
|
| 86 |
+
}}
|
| 87 |
|
| 88 |
+
function dragged(event, d) {{
|
| 89 |
+
d.fx = event.x;
|
| 90 |
+
d.fy = event.y;
|
| 91 |
+
}}
|
| 92 |
|
| 93 |
+
function dragended(event, d) {{
|
| 94 |
+
if (!event.active) simulation.alphaTarget(0);
|
| 95 |
+
d.fx = null;
|
| 96 |
+
d.fy = null;
|
| 97 |
+
}}
|
| 98 |
+
</script>
|
| 99 |
+
"""
|
| 100 |
+
|
| 101 |
+
# ---------------------------------------------------------
|
| 102 |
+
# SEARCH LOGIC
|
| 103 |
+
# ---------------------------------------------------------
|
| 104 |
+
|
| 105 |
+
def run_search(query: str, agencies: List[str]):
|
| 106 |
+
log_event("search", {"query": query, "agencies": agencies})
|
| 107 |
+
|
| 108 |
+
rows = []
|
| 109 |
for d in DOCUMENTS:
|
|
|
|
|
|
|
| 110 |
if query.lower() in d["content"].lower():
|
| 111 |
+
if not agencies or d["agency"] in agencies:
|
| 112 |
+
rows.append([d["title"], d["agency"], d["date"]])
|
| 113 |
+
return rows
|
| 114 |
|
| 115 |
+
# ---------------------------------------------------------
|
| 116 |
+
# UI
|
| 117 |
+
# ---------------------------------------------------------
|
| 118 |
|
| 119 |
+
with gr.Blocks(title="FOIA Declassified Document Search") as app:
|
|
|
|
| 120 |
|
| 121 |
+
gr.Markdown("""
|
| 122 |
+
# 🗂️ FOIA Declassified Document Search
|
| 123 |
+
|
| 124 |
+
⚠️ **Demo Mode – Public FOIA samples only**
|
| 125 |
+
|
| 126 |
+
This tool analyzes *mentions*, not facts.
|
| 127 |
+
No classified material. No live government systems.
|
| 128 |
+
""")
|
| 129 |
+
|
| 130 |
+
with gr.Tab("🔍 Search"):
|
| 131 |
+
query = gr.Textbox(label="Search query")
|
| 132 |
+
agency_filter = gr.CheckboxGroup(
|
| 133 |
+
["CIA", "DoD", "DIA", "FBI", "NRO", "NSA", "Unknown"],
|
| 134 |
+
label="Filter by agency"
|
| 135 |
+
)
|
| 136 |
+
output = gr.Dataframe(
|
| 137 |
+
headers=["Title", "Agency", "Date"],
|
| 138 |
+
interactive=False
|
| 139 |
+
)
|
| 140 |
+
gr.Button("Search").click(run_search, [query, agency_filter], output)
|
| 141 |
+
|
| 142 |
+
with gr.Tab("🧠 Entity Graph"):
|
| 143 |
+
graph = build_entity_graph(DOCUMENTS)
|
| 144 |
+
gr.HTML(render_d3_graph(graph))
|
| 145 |
+
|
| 146 |
+
with gr.Tab("🤝 Collaboration"):
|
| 147 |
+
doc = gr.Textbox(label="Document title")
|
| 148 |
+
note = gr.Textbox(label="Note")
|
| 149 |
+
collab_out = gr.Dataframe()
|
| 150 |
|
| 151 |
+
def add_note_ui(d, n):
|
| 152 |
+
log_event("collaboration_note", {"document": d})
|
| 153 |
+
add_collaboration_note(d, n)
|
| 154 |
+
return get_collaboration_dataset().to_pandas()
|
| 155 |
|
| 156 |
+
gr.Button("Add note").click(add_note_ui, [doc, note], collab_out)
|
|
|
|
| 157 |
|
| 158 |
+
with gr.Tab("🧾 Audit Log"):
|
| 159 |
+
gr.JSON(export_audit_log)
|
| 160 |
|
| 161 |
gr.Markdown("""
|
| 162 |
+
### Dataset Provenance
|
| 163 |
+
- Sources: Public FOIA releases (FBI Vault, CIA Reading Room, DoD FOIA)
|
| 164 |
+
- Status: Previously released, unclassified
|
| 165 |
+
- Scope: Demo subset only
|
| 166 |
+
- Curation: Manual sampling for demonstration
|
| 167 |
+
- Verification: No claims beyond document text
|
| 168 |
+
""")
|
| 169 |
|
| 170 |
app.launch()
|