Update src/streamlit_app.py

#1
by moddux - opened
Files changed (1) hide show
  1. src/streamlit_app.py +418 -40
src/streamlit_app.py CHANGED
@@ -1,40 +1,418 @@
1
- import altair as alt
2
- import numpy as np
3
- import pandas as pd
4
- import streamlit as st
5
-
6
- """
7
- # Welcome to Streamlit!
8
-
9
- Edit `/streamlit_app.py` to customize this app to your heart's desire :heart:.
10
- If you have any questions, checkout our [documentation](https://docs.streamlit.io) and [community
11
- forums](https://discuss.streamlit.io).
12
-
13
- In the meantime, below is an example of what you can do with just a few lines of code:
14
- """
15
-
16
- num_points = st.slider("Number of points in spiral", 1, 10000, 1100)
17
- num_turns = st.slider("Number of turns in spiral", 1, 300, 31)
18
-
19
- indices = np.linspace(0, 1, num_points)
20
- theta = 2 * np.pi * num_turns * indices
21
- radius = indices
22
-
23
- x = radius * np.cos(theta)
24
- y = radius * np.sin(theta)
25
-
26
- df = pd.DataFrame({
27
- "x": x,
28
- "y": y,
29
- "idx": indices,
30
- "rand": np.random.randn(num_points),
31
- })
32
-
33
- st.altair_chart(alt.Chart(df, height=700, width=700)
34
- .mark_point(filled=True)
35
- .encode(
36
- x=alt.X("x", axis=None),
37
- y=alt.Y("y", axis=None),
38
- color=alt.Color("idx", legend=None, scale=alt.Scale()),
39
- size=alt.Size("rand", legend=None, scale=alt.Scale(range=[1, 150])),
40
- ))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ @@
2
+ -import streamlit as st
3
+ -import importlib.util
4
+ -import os
5
+ -import json
6
+ -import tempfile
7
+ -from pathlib import Path
8
+ -
9
+ -# --- EXISTING PIPELINE IMPORTS (keep these as in original app) ---
10
+ -from modules.ingestion.ingest_data import run as ingest_run
11
+ -from modules.preprocessing.preprocess_data import run as preprocess_run
12
+ -from modules.ml_analysis.ml_analysis import run as ml_run
13
+ -from modules.correlation.correlate_ioc import run as correlate_run
14
+ -from modules.export.export_results import run as export_run
15
+ -
16
+ -st.set_page_config(page_title="Modular OSINT Pipeline", layout="wide")
17
+ -st.title("Modular OSINT Pipeline Dashboard")
18
+ -
19
+ -# --- PIPELINE WORKFLOW, unchanged ---
20
+ -def write_temp(data: dict) -> str:
21
+ - f = tempfile.NamedTemporaryFile(delete=False, suffix=".json")
22
+ - f.write(json.dumps(data).encode())
23
+ - f.close()
24
+ - return f.name
25
+ -
26
+ -uploaded = st.file_uploader("Upload initial OSINTModuleInput JSON", type=["json"])
27
+ -if uploaded:
28
+ - init_input = json.load(uploaded)
29
+ - st.session_state["input"] = init_input
30
+ -
31
+ -if "input" in st.session_state:
32
+ - st.markdown("### Initial Input")
33
+ - st.json(st.session_state["input"])
34
+ -
35
+ - col1, col2 = st.columns(2)
36
+ -
37
+ - with col1:
38
+ - if st.button("Run Ingestion"):
39
+ - path = write_temp(st.session_state["input"])
40
+ - out = ingest_run(path)
41
+ - st.session_state["ingest"] = json.loads(out.json())
42
+ - if "ingest" in st.session_state:
43
+ - st.markdown("#### Ingestion Output")
44
+ - st.json(st.session_state["ingest"])
45
+ -
46
+ - with col1:
47
+ - if st.button("Run Preprocessing"):
48
+ - prev = st.session_state.get("ingest", st.session_state["input"])
49
+ - path = write_temp(prev)
50
+ - out = preprocess_run(path)
51
+ - st.session_state["preprocess"] = json.loads(out.json())
52
+ - if "preprocess" in st.session_state:
53
+ - st.markdown("#### Preprocessing Output")
54
+ - st.json(st.session_state["preprocess"])
55
+ -
56
+ - with col2:
57
+ - if st.button(" Run ML Analysis"):
58
+ - prev = st.session_state.get("preprocess", st.session_state.get("ingest"))
59
+ - path = write_temp(prev)
60
+ - out = ml_run(path)
61
+ - st.session_state["ml"] = json.loads(out.json())
62
+ - if "ml" in st.session_state:
63
+ - st.markdown("#### ML Analysis Output")
64
+ - st.json(st.session_state["ml"])
65
+ -
66
+ - with col2:
67
+ - if st.button("Run Correlation"):
68
+ - prev = st.session_state.get("ml", st.session_state.get("preprocess"))
69
+ - path = write_temp(prev)
70
+ - out = correlate_run(path)
71
+ - st.session_state["correlate"] = json.loads(out.json())
72
+ - if "correlate" in st.session_state:
73
+ - st.markdown("#### Correlation Output")
74
+ - st.json(st.session_state["correlate"])
75
+ -
76
+ - if st.button("Run Export"):
77
+ - prev = st.session_state.get("correlate", st.session_state.get("ml"))
78
+ - path = write_temp(prev)
79
+ - out = export_run(path)
80
+ - st.session_state["export"] = json.loads(out.json())
81
+ - if "export" in st.session_state:
82
+ - st.markdown("#### Export Output")
83
+ - st.json(st.session_state["export"])
84
+ -
85
+ - # Export/download option for pipeline output
86
+ - st.download_button(
87
+ - label="Download Exported Results",
88
+ - data=json.dumps(st.session_state["export"], indent=2),
89
+ - file_name="osint_export.json",
90
+ - mime="application/json"
91
+ - )
92
+ -
93
+ -# --- STANDALONE MODULE LAUNCHER SECTION ---
94
+ -st.sidebar.header("Standalone Modules")
95
+ -
96
+ -MODULES_DIR = Path("Modules")
97
+ -MODULE_REGISTRY = MODULES_DIR / "module_registry.json"
98
+ -
99
+ -def discover_py_modules(directory):
100
+ - """List .py scripts in the given directory (non-recursive, excludes __init__.py)."""
101
+ - return [
102
+ - f for f in os.listdir(directory)
103
+ - if f.endswith(".py") and f != "__init__.py"
104
+ - ]
105
+ -
106
+ -def load_module_description(module_name):
107
+ - """Get description from registry or fallback to module docstring."""
108
+ - # Registry lookup
109
+ - if MODULE_REGISTRY.exists():
110
+ - with open(MODULE_REGISTRY) as regfile:
111
+ - registry = json.load(regfile)
112
+ - if module_name in registry:
113
+ - return registry[module_name].get("description", "")
114
+ - # Fallback: docstring from module file
115
+ - module_path = MODULES_DIR / f"{module_name}.py"
116
+ - if module_path.exists():
117
+ - with open(module_path) as f:
118
+ - first_line = f.readline()
119
+ - if first_line.startswith("\"\"\"") or first_line.startswith("'''"):
120
+ - docstring = first_line.strip().strip("\"'") + " "
121
+ - while True:
122
+ - l = f.readline()
123
+ - if not l or l.startswith("\"\"\"") or l.startswith("'''"):
124
+ - break
125
+ - docstring += l.strip() + " "
126
+ - return docstring.strip()
127
+ - return ""
128
+ -
129
+ -def get_module_params(module_name):
130
+ - """Load osintmodule.json file if present for the module."""
131
+ - param_path = MODULES_DIR / f"{module_name}.osintmodule.json"
132
+ - if param_path.exists():
133
+ - with open(param_path, "r") as f:
134
+ - return json.load(f), str(param_path)
135
+ - return None, None
136
+ -
137
+ -def run_module(module_name, params=None):
138
+ - """Dynamically import and run the main() function of a module, passing params if supported."""
139
+ - module_path = MODULES_DIR / f"{module_name}.py"
140
+ - if not module_path.exists():
141
+ - st.error(f"Module {module_name} not found.")
142
+ - return
143
+ - spec = importlib.util.spec_from_file_location(module_name, str(module_path))
144
+ - mod = importlib.util.module_from_spec(spec)
145
+ - spec.loader.exec_module(mod)
146
+ - if hasattr(mod, "main"):
147
+ - # Try to pass params if main() supports it
148
+ - import inspect
149
+ - sig = inspect.signature(mod.main)
150
+ - if params and len(sig.parameters) > 0:
151
+ - mod.main(params)
152
+ - else:
153
+ - mod.main()
154
+ - else:
155
+ - st.warning(f"{module_name} does not have a main() function.")
156
+ -
157
+ -def save_params_json(params, param_path):
158
+ - # Overwrites the osintmodule.json file
159
+ - with open(param_path, "w") as f:
160
+ - json.dump(params, f, indent=2)
161
+ -
162
+ -py_modules = discover_py_modules(MODULES_DIR)
163
+ -for mod in py_modules:
164
+ - mod_name = mod[:-3]
165
+ - desc = load_module_description(mod_name)
166
+ - with st.sidebar.expander(mod_name, expanded=False):
167
+ - if desc:
168
+ - st.info(desc)
169
+ -
170
+ - # Parameter editor (if osintmodule.json exists)
171
+ - params, param_path = get_module_params(mod_name)
172
+ - param_input = None
173
+ - if params is not None:
174
+ - st.markdown("**Edit module parameters:**")
175
+ - param_str = st.text_area(
176
+ - "Parameters (JSON)",
177
+ - value=json.dumps(params, indent=2),
178
+ - key=f"params_{mod_name}",
179
+ - height=200
180
+ - )
181
+ - try:
182
+ - param_input = json.loads(param_str)
183
+ - st.success("Valid JSON")
184
+ - except Exception as e:
185
+ - st.error(f"Invalid JSON: {e}")
186
+ - param_input = None
187
+ -
188
+ - if st.button(f"Run {mod_name}", key=f"run_{mod_name}"):
189
+ - st.write(f"## Running: {mod_name}")
190
+ - if desc:
191
+ - st.info(desc)
192
+ - # Save edited params if changed
193
+ - if param_input is not None and param_path:
194
+ - save_params_json(param_input, param_path)
195
+ - params = param_input
196
+ - # Run the module (with params if possible)
197
+ - run_module(mod_name, params)
198
+ - # Look for output file to export (if your modules save output)
199
+ - output_files = list((MODULES_DIR / "Data" / "Results").glob(f"{mod_name}*.*"))
200
+ - if output_files:
201
+ - for f in output_files:
202
+ - with open(f, "rb") as fo:
203
+ - st.download_button(
204
+ - label=f"Download result: {f.name}",
205
+ - data=fo,
206
+ - file_name=f.name
207
+ - )
208
+ +import streamlit as st
209
+ +import importlib.util
210
+ +import os
211
+ +import json
212
+ +import tempfile
213
+ +from pathlib import Path
214
+ +import inspect
215
+ +
216
+ +# --- EXISTING PIPELINE IMPORTS (keep these as in original app) ---
217
+ +from modules.ingestion.ingest_data import run as ingest_run
218
+ +from modules.preprocessing.preprocess_data import run as preprocess_run
219
+ +from modules.ml_analysis.ml_analysis import run as ml_run
220
+ +from modules.correlation.correlate_ioc import run as correlate_run
221
+ +from modules.export.export_results import run as export_run
222
+ +
223
+ +st.set_page_config(page_title="Modular OSINT Pipeline", layout="wide")
224
+ +st.title("🚀 Modular OSINT Pipeline Dashboard")
225
+ +
226
+ +# --- PIPELINE WORKFLOW, unchanged ---
227
+ +def write_temp(data: dict) -> str:
228
+ + f = tempfile.NamedTemporaryFile(delete=False, suffix=".json")
229
+ + f.write(json.dumps(data).encode())
230
+ + f.close()
231
+ + return f.name
232
+ +
233
+ +uploaded = st.file_uploader("Upload initial OSINTModuleInput JSON", type=["json"])
234
+ +if uploaded:
235
+ + init_input = json.load(uploaded)
236
+ + st.session_state["input"] = init_input
237
+ +
238
+ +if "input" in st.session_state:
239
+ + st.markdown("### 🔍 Initial Input")
240
+ + st.json(st.session_state["input"])
241
+ +
242
+ + col1, col2 = st.columns(2)
243
+ +
244
+ + with col1:
245
+ + if st.button("Run Ingestion"):
246
+ + path = write_temp(st.session_state["input"])
247
+ + out = ingest_run(path)
248
+ + st.session_state["ingest"] = json.loads(out.json())
249
+ + if "ingest" in st.session_state:
250
+ + st.markdown("#### Ingestion Output")
251
+ + st.json(st.session_state["ingest"])
252
+ +
253
+ + with col1:
254
+ + if st.button(" Run Preprocessing"):
255
+ + prev = st.session_state.get("ingest", st.session_state["input"])
256
+ + path = write_temp(prev)
257
+ + out = preprocess_run(path)
258
+ + st.session_state["preprocess"] = json.loads(out.json())
259
+ + if "preprocess" in st.session_state:
260
+ + st.markdown("#### Preprocessing Output")
261
+ + st.json(st.session_state["preprocess"])
262
+ +
263
+ + with col2:
264
+ + if st.button("Run ML Analysis"):
265
+ + prev = st.session_state.get("preprocess", st.session_state.get("ingest"))
266
+ + path = write_temp(prev)
267
+ + out = ml_run(path)
268
+ + st.session_state["ml"] = json.loads(out.json())
269
+ + if "ml" in st.session_state:
270
+ + st.markdown("#### ML Analysis Output")
271
+ + st.json(st.session_state["ml"])
272
+ +
273
+ + with col2:
274
+ + if st.button("Run Correlation"):
275
+ + prev = st.session_state.get("ml", st.session_state.get("preprocess"))
276
+ + path = write_temp(prev)
277
+ + out = correlate_run(path)
278
+ + st.session_state["correlate"] = json.loads(out.json())
279
+ + if "correlate" in st.session_state:
280
+ + st.markdown("#### Correlation Output")
281
+ + st.json(st.session_state["correlate"])
282
+ +
283
+ + if st.button("Run Export"):
284
+ + prev = st.session_state.get("correlate", st.session_state.get("ml"))
285
+ + path = write_temp(prev)
286
+ + out = export_run(path)
287
+ + st.session_state["export"] = json.loads(out.json())
288
+ + if "export" in st.session_state:
289
+ + st.markdown("#### Export Output")
290
+ + st.json(st.session_state["export"])
291
+ + st.download_button(
292
+ + label="Download Exported Results",
293
+ + data=json.dumps(st.session_state["export"], indent=2),
294
+ + file_name="osint_export.json",
295
+ + mime="application/json"
296
+ + )
297
+ +
298
+ +# --- MULTI-DIRECTORY MODULE LAUNCHER SECTION ---
299
+ +st.sidebar.header("Standalone & Subdirectory Modules")
300
+ +
301
+ +MODULES_DIR = Path("Modules")
302
+ +MODULE_REGISTRY = MODULES_DIR / "module_registry.json"
303
+ +
304
+ +def discover_py_modules(directory):
305
+ + """Recursively list .py scripts (excluding __init__.py) with their relative paths."""
306
+ + py_modules = []
307
+ + for root, dirs, files in os.walk(directory):
308
+ + for f in files:
309
+ + if f.endswith(".py") and f != "__init__.py":
310
+ + rel_path = Path(root).relative_to(directory) / f
311
+ + py_modules.append(rel_path)
312
+ + return py_modules
313
+ +
314
+ +def load_module_description(module_path):
315
+ + """Try to get description from registry or fallback to docstring."""
316
+ + module_name = str(module_path.with_suffix('')).replace(os.sep, ".")
317
+ + # Registry lookup (top-level modules only)
318
+ + if MODULE_REGISTRY.exists():
319
+ + with open(MODULE_REGISTRY) as regfile:
320
+ + registry = json.load(regfile)
321
+ + if module_name in registry:
322
+ + return registry[module_name].get("description", "")
323
+ + # Fallback: docstring from module file
324
+ + full_path = MODULES_DIR / module_path
325
+ + if full_path.exists():
326
+ + with open(full_path) as f:
327
+ + first_line = f.readline()
328
+ + if first_line.startswith("\"\"\"") or first_line.startswith("'''"):
329
+ + delimiter = first_line[:3]
330
+ + docstring = ""
331
+ + while True:
332
+ + l = f.readline()
333
+ + if not l or l.startswith(delimiter):
334
+ + break
335
+ + docstring += l.strip() + " "
336
+ + return docstring.strip()
337
+ + return ""
338
+ +
339
+ +def get_module_params(module_path):
340
+ + """Load osintmodule.json file if present for the module (same name, same directory)."""
341
+ + param_path = (MODULES_DIR / module_path).with_suffix('.osintmodule.json')
342
+ + if param_path.exists():
343
+ + with open(param_path, "r") as f:
344
+ + return json.load(f), str(param_path)
345
+ + return None, None
346
+ +
347
+ +def run_module(module_path, params=None):
348
+ + """
349
+ + Dynamically import and run the main() function for any discovered .py module,
350
+ + passing params if the signature supports it.
351
+ + """
352
+ + full_path = MODULES_DIR / module_path
353
+ + if not full_path.exists():
354
+ + st.error(f"Module file not found: {module_path}")
355
+ + return
356
+ + mod_name = "mod_" + str(module_path).replace("/", "_").replace("\\", "_").replace(".py", "")
357
+ + spec = importlib.util.spec_from_file_location(mod_name, str(full_path))
358
+ + mod = importlib.util.module_from_spec(spec)
359
+ + spec.loader.exec_module(mod)
360
+ + if hasattr(mod, "main"):
361
+ + sig = inspect.signature(mod.main)
362
+ + if params and len(sig.parameters) > 0:
363
+ + mod.main(params)
364
+ + else:
365
+ + mod.main()
366
+ + else:
367
+ + st.warning(f"{module_path} does not have a main() function.")
368
+ +
369
+ +def save_params_json(params, param_path):
370
+ + with open(param_path, "w") as f:
371
+ + json.dump(params, f, indent=2)
372
+ +
373
+ +py_modules = discover_py_modules(MODULES_DIR)
374
+ +for rel_path in py_modules:
375
+ + mod_label = str(rel_path)
376
+ + mod_name = rel_path.stem
377
+ + desc = load_module_description(rel_path)
378
+ + with st.sidebar.expander(mod_label, expanded=False):
379
+ + if desc:
380
+ + st.info(desc)
381
+ + params, param_path = get_module_params(rel_path)
382
+ + param_input = None
383
+ + if params is not None:
384
+ + st.markdown("**Edit module parameters:**")
385
+ + param_str = st.text_area(
386
+ + "Parameters (JSON)",
387
+ + value=json.dumps(params, indent=2),
388
+ + key=f"params_{mod_label}",
389
+ + height=200,
390
+ + )
391
+ + try:
392
+ + param_input = json.loads(param_str)
393
+ + st.success("Valid JSON")
394
+ + except Exception as e:
395
+ + st.error(f"Invalid JSON: {e}")
396
+ + param_input = None
397
+ + if st.button(f"Run {mod_label}", key=f"run_{mod_label}"):
398
+ + st.write(f"## Running: {mod_label}")
399
+ + if desc:
400
+ + st.info(desc)
401
+ + if param_input is not None and param_path:
402
+ + save_params_json(param_input, param_path)
403
+ + params = param_input
404
+ + run_module(rel_path, params)
405
+ + # Try to find and offer downloads for any result files in this module's directory or a shared results folder
406
+ + results_folders = [
407
+ + (MODULES_DIR / rel_path.parent / "Results"),
408
+ + (MODULES_DIR / "Data" / "Results"),
409
+ + ]
410
+ + for results_dir in results_folders:
411
+ + if results_dir.exists():
412
+ + for f in results_dir.glob(f"{mod_name}*.*"):
413
+ + with open(f, "rb") as fo:
414
+ + st.download_button(
415
+ + label=f"Download result: {f.name}",
416
+ + data=fo,
417
+ + file_name=f.name,
418
+ + )