GodsDevProject commited on
Commit
9c8150d
·
verified ·
1 Parent(s): e77dfd9

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +66 -45
app.py CHANGED
@@ -1,90 +1,111 @@
1
  import gradio as gr
2
  import asyncio
3
 
4
- from ingest.registry import get_all_adapters
5
- from ingest.cluster import semantic_clusters
6
  from ingest.export import export_results_zip
7
- from ingest.health import check_health
8
 
9
- ADAPTERS = get_all_adapters()
 
10
 
11
- async def federated_search(query: str):
12
  results = []
13
- for adapter in ADAPTERS.values():
 
14
  try:
15
  res = await adapter.search(query)
16
  results.extend(res)
17
  except Exception:
18
  continue
 
 
 
 
 
 
 
 
 
19
  return results
20
 
21
- def run_search(query):
 
 
 
22
  if not query.strip():
23
- return [], []
24
 
25
- results = asyncio.run(federated_search(query))
26
 
27
  rows = [
28
  [r["source"], r["title"], r["url"], r["snippet"]]
29
  for r in results
30
  ]
31
 
32
- texts = [r["snippet"] for r in results]
33
- clusters = semantic_clusters(texts) if texts else []
34
-
35
- return rows, clusters
36
-
37
- def export_zip_handler(table):
38
- results = []
39
- for row in table:
40
- results.append({
41
- "source": row[0],
42
- "title": row[1],
43
- "url": row[2],
44
- "snippet": row[3],
45
- })
46
- return export_results_zip(results)
47
 
48
  with gr.Blocks() as app:
49
  gr.Markdown(
50
  "# **Federal FOIA Intelligence Search**\n"
51
  "### Public Electronic Reading Rooms Only\n\n"
52
- "Live, robots-compliant search across U.S. government FOIA libraries."
 
 
 
 
 
 
 
 
 
 
 
53
  )
54
 
55
- query = gr.Textbox(label="Search term", placeholder="e.g. UAP, procurement, surveillance")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
56
  search_btn = gr.Button("Search")
57
 
 
58
  results_table = gr.Dataframe(
59
- headers=["Agency", "Title", "URL", "Snippet"],
60
- interactive=False,
61
- wrap=True
62
  )
63
 
64
- cluster_output = gr.State()
65
-
66
  search_btn.click(
67
- fn=run_search,
68
- inputs=query,
69
- outputs=[results_table, cluster_output]
70
  )
71
 
72
  gr.Markdown("### Export")
73
  export_btn = gr.Button("Export Results (ZIP)")
74
- zip_file = gr.File(label="Download ZIP")
75
 
76
  export_btn.click(
77
- fn=export_zip_handler,
 
 
 
 
 
 
 
78
  inputs=results_table,
79
- outputs=zip_file
80
  )
81
 
82
- gr.Markdown("### Source Health")
83
- for adapter in ADAPTERS.values():
84
- health = check_health(adapter)
85
- gr.Markdown(
86
- f"- **{health['source']}**: {health['status']} "
87
- f"({health['latency_ms']} ms)"
88
- )
89
-
90
  app.launch()
 
1
  import gradio as gr
2
  import asyncio
3
 
4
+ from ingest.registry import get_live_adapters, get_stub_adapters
 
5
  from ingest.export import export_results_zip
 
6
 
7
+ LIVE_ADAPTERS = get_live_adapters()
8
+ STUB_ADAPTERS = get_stub_adapters()
9
 
10
+ async def federated_search(query: str, include_stubs: bool):
11
  results = []
12
+
13
+ for adapter in LIVE_ADAPTERS.values():
14
  try:
15
  res = await adapter.search(query)
16
  results.extend(res)
17
  except Exception:
18
  continue
19
+
20
+ if include_stubs:
21
+ for adapter in STUB_ADAPTERS.values():
22
+ try:
23
+ res = await adapter.search(query)
24
+ results.extend(res)
25
+ except Exception:
26
+ continue
27
+
28
  return results
29
 
30
+ def search_handler(query, include_stubs, confirm_extended):
31
+ if include_stubs and not confirm_extended:
32
+ return [], "⚠️ You must acknowledge the Extended Features warning."
33
+
34
  if not query.strip():
35
+ return [], "⚠️ Enter a search term."
36
 
37
+ results = asyncio.run(federated_search(query, include_stubs))
38
 
39
  rows = [
40
  [r["source"], r["title"], r["url"], r["snippet"]]
41
  for r in results
42
  ]
43
 
44
+ return rows, ""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45
 
46
  with gr.Blocks() as app:
47
  gr.Markdown(
48
  "# **Federal FOIA Intelligence Search**\n"
49
  "### Public Electronic Reading Rooms Only\n\n"
50
+ "Live, robots-compliant search across U.S. government FOIA libraries.\n\n"
51
+ "**Default mode queries only agencies that explicitly permit automated access.**"
52
+ )
53
+
54
+ query = gr.Textbox(
55
+ label="Search term",
56
+ placeholder="e.g. UAP, procurement, surveillance"
57
+ )
58
+
59
+ include_stubs = gr.Checkbox(
60
+ label="Include Stub Results (Non-Live, Informational Only)",
61
+ value=False
62
  )
63
 
64
+ with gr.Accordion("⚠️ Extended Features (Advanced – Read Carefully)", open=False):
65
+ gr.Markdown(
66
+ "**The following agencies do NOT permit automated querying or do not "
67
+ "provide public FOIA search endpoints:**\n\n"
68
+ "- NSA\n- NRO\n- SAP / Special Access Programs\n- TEN-CAP\n"
69
+ "- AATIP\n- Special Activities\n- DIA\n- NGA\n\n"
70
+ "If enabled, these sources return **clearly labeled stub results only**. "
71
+ "**No live requests are made.**"
72
+ )
73
+
74
+ confirm_extended = gr.Checkbox(
75
+ label="I understand and want to include stub-only results",
76
+ value=False
77
+ )
78
+
79
  search_btn = gr.Button("Search")
80
 
81
+ status = gr.Markdown("")
82
  results_table = gr.Dataframe(
83
+ headers=["Source", "Title", "URL", "Snippet"],
84
+ wrap=True,
85
+ interactive=False
86
  )
87
 
 
 
88
  search_btn.click(
89
+ fn=search_handler,
90
+ inputs=[query, include_stubs, confirm_extended],
91
+ outputs=[results_table, status]
92
  )
93
 
94
  gr.Markdown("### Export")
95
  export_btn = gr.Button("Export Results (ZIP)")
96
+ export_file = gr.File(label="Download ZIP")
97
 
98
  export_btn.click(
99
+ fn=lambda rows: export_results_zip([
100
+ {
101
+ "source": r[0],
102
+ "title": r[1],
103
+ "url": r[2],
104
+ "snippet": r[3]
105
+ } for r in rows
106
+ ]),
107
  inputs=results_table,
108
+ outputs=export_file
109
  )
110
 
 
 
 
 
 
 
 
 
111
  app.launch()