NexusInstruments commited on
Commit
084a8af
·
verified ·
1 Parent(s): 6e55d38

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +133 -308
app.py CHANGED
@@ -1,330 +1,155 @@
1
- import os
2
- import json
3
- import hashlib
4
- import time
5
- from datetime import datetime
6
  import gradio as gr
7
- from huggingface_hub import InferenceClient
8
- from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer
9
- from reportlab.lib.styles import getSampleStyleSheet
10
- from reportlab.lib.units import inch
11
-
12
- # ============================================================
13
- # CONFIGURATION
14
- # ============================================================
15
-
16
- OMNISCIENT_ROOT = "/opt/omniscient"
17
- CASES_ROOT = os.path.join(OMNISCIENT_ROOT, "cases")
18
- os.makedirs(CASES_ROOT, exist_ok=True)
19
-
20
- MODEL_OPTIONS = {
21
- "Zephyr 7B (Stable)": "HuggingFaceH4/zephyr-7b-beta",
22
- "Mixtral 8x7B": "mistralai/Mixtral-8x7B-Instruct-v0.1",
23
- }
24
-
25
- FALLBACK_MODEL = "HuggingFaceH4/zephyr-7b-beta"
26
-
27
- # ============================================================
28
- # UTILITIES
29
- # ============================================================
30
-
31
- def sha256_data(data):
32
- return hashlib.sha256(data.encode()).hexdigest()
33
-
34
- def sha256_file(path):
35
- h = hashlib.sha256()
36
- with open(path, "rb") as f:
37
- while chunk := f.read(8192):
38
- h.update(chunk)
39
- return h.hexdigest()
40
-
41
- def case_path(case_id):
42
- return os.path.join(CASES_ROOT, case_id)
43
-
44
- def manifest_path(case_id):
45
- return os.path.join(case_path(case_id), "manifest.json")
46
-
47
- def audit_path(case_id):
48
- return os.path.join(case_path(case_id), "audit_log.json")
49
-
50
- def ai_notes_path(case_id):
51
- return os.path.join(case_path(case_id), "ai_analysis.json")
52
-
53
- # ============================================================
54
- # CASE MANAGEMENT
55
- # ============================================================
56
-
57
- def create_case(case_id):
58
- path = case_path(case_id)
59
- os.makedirs(path, exist_ok=True)
60
- os.makedirs(os.path.join(path, "evidence"), exist_ok=True)
61
- return f"✅ Case created: {case_id}"
62
-
63
- def list_cases():
64
- return "\n".join(os.listdir(CASES_ROOT))
65
-
66
- # ============================================================
67
- # HASH-CHAINED MANIFEST
68
- # ============================================================
69
-
70
- def register_evidence(case_id, file, operator, notes):
71
- if not file:
72
- return "No file uploaded."
73
-
74
- path = case_path(case_id)
75
- evidence_dir = os.path.join(path, "evidence")
76
-
77
- if not os.path.exists(path):
78
- return "Case does not exist."
79
-
80
- dest = os.path.join(evidence_dir, os.path.basename(file.name))
81
- with open(file.name, "rb") as src, open(dest, "wb") as dst:
82
- dst.write(src.read())
83
-
84
- file_hash = sha256_file(dest)
85
-
86
- if os.path.exists(manifest_path(case_id)):
87
- with open(manifest_path(case_id), "r") as f:
88
- manifest = json.load(f)
89
- else:
90
- manifest = []
91
-
92
- prev_hash = manifest[-1]["entry_hash"] if manifest else "GENESIS"
93
-
94
- entry = {
95
- "file": dest,
96
- "sha256": file_hash,
97
- "operator": operator,
98
- "notes": notes,
99
- "timestamp": datetime.utcnow().isoformat(),
100
- "previous_hash": prev_hash
101
  }
 
102
 
103
- entry_hash = sha256_data(json.dumps(entry))
104
- entry["entry_hash"] = entry_hash
105
-
106
- manifest.append(entry)
107
-
108
- with open(manifest_path(case_id), "w") as f:
109
- json.dump(manifest, f, indent=2)
110
-
111
- append_audit_log(case_id, "EVIDENCE_REGISTERED", entry)
112
-
113
- return f"Evidence registered.\nSHA256: {file_hash}"
114
-
115
- # ============================================================
116
- # HASH-CHAINED AUDIT LOG
117
- # ============================================================
118
 
119
- def append_audit_log(case_id, action, details):
120
- if os.path.exists(audit_path(case_id)):
121
- with open(audit_path(case_id), "r") as f:
122
- log = json.load(f)
123
- else:
124
- log = []
125
-
126
- prev_hash = log[-1]["entry_hash"] if log else "GENESIS"
127
-
128
- entry = {
129
- "timestamp": datetime.utcnow().isoformat(),
130
- "action": action,
131
- "details": details,
132
- "previous_hash": prev_hash
133
- }
134
-
135
- entry_hash = sha256_data(json.dumps(entry))
136
- entry["entry_hash"] = entry_hash
137
 
138
- log.append(entry)
 
 
 
139
 
140
- with open(audit_path(case_id), "w") as f:
141
- json.dump(log, f, indent=2)
 
142
 
143
- # ============================================================
144
- # VERIFY INTEGRITY
145
- # ============================================================
146
 
147
- def verify_integrity(case_id):
148
- # Verify Manifest Chain
149
- if os.path.exists(manifest_path(case_id)):
150
- with open(manifest_path(case_id), "r") as f:
151
- manifest = json.load(f)
152
 
153
- for i in range(1, len(manifest)):
154
- if manifest[i]["previous_hash"] != manifest[i-1]["entry_hash"]:
155
- return "Manifest chain broken."
156
 
157
- # Verify Audit Chain
158
- if os.path.exists(audit_path(case_id)):
159
- with open(audit_path(case_id), "r") as f:
160
- log = json.load(f)
161
 
162
- for i in range(1, len(log)):
163
- if log[i]["previous_hash"] != log[i-1]["entry_hash"]:
164
- return "Audit chain broken."
165
 
166
- return "Integrity verified."
 
167
 
168
- # ============================================================
169
- # AI AUGMENTATION (CASE-BOUND)
170
- # ============================================================
171
 
172
- def get_client(model_id):
173
- token = os.environ.get("HF_TOKEN")
174
- return InferenceClient(model=model_id, token=token)
175
 
176
- def run_ai(model_id, messages):
177
- try:
178
- client = get_client(model_id)
179
- response = client.chat_completion(
180
- messages=messages,
181
- max_tokens=1000,
182
- temperature=0.4,
183
- top_p=0.95,
184
- )
185
- return response.choices[0].message.content
186
- except:
187
- fallback = get_client(FALLBACK_MODEL)
188
- response = fallback.chat_completion(
189
- messages=messages,
190
- max_tokens=1000,
191
- temperature=0.4,
192
- top_p=0.95,
193
- )
194
- return "Fallback model used.\n\n" + response.choices[0].message.content
195
-
196
- def ai_analysis(case_id, input_text, model_label):
197
- system_prompt = """
198
- You are an AI augmentation module for digital forensics.
199
- Structure output with clear analytical sections.
200
- AI does not modify evidence.
201
- """
202
 
203
- messages = [
204
- {"role": "system", "content": system_prompt},
205
- {"role": "user", "content": input_text}
206
- ]
207
 
208
- result = run_ai(MODEL_OPTIONS[model_label], messages)
209
 
210
- # Store AI notes per case
211
- notes_path = ai_notes_path(case_id)
212
- if os.path.exists(notes_path):
213
- with open(notes_path, "r") as f:
214
- notes = json.load(f)
215
- else:
216
- notes = []
217
 
218
- entry = {
219
- "timestamp": datetime.utcnow().isoformat(),
220
- "analysis": result
221
- }
222
 
223
- notes.append(entry)
 
 
 
 
224
 
225
- with open(notes_path, "w") as f:
226
- json.dump(notes, f, indent=2)
 
 
 
227
 
228
- append_audit_log(case_id, "AI_ANALYSIS", {"length": len(input_text)})
229
-
230
- return result
231
-
232
- # ============================================================
233
- # CASE TIMELINE
234
- # ============================================================
235
-
236
- def case_timeline(case_id):
237
- events = []
238
-
239
- if os.path.exists(audit_path(case_id)):
240
- with open(audit_path(case_id), "r") as f:
241
- log = json.load(f)
242
- for entry in log:
243
- events.append(f"{entry['timestamp']} - {entry['action']}")
244
-
245
- return "\n".join(sorted(events))
246
-
247
- # ============================================================
248
- # PDF EXPORT
249
- # ============================================================
250
-
251
- def export_case_pdf(case_id):
252
- path = case_path(case_id)
253
- pdf_path = os.path.join(path, f"{case_id}_report.pdf")
254
-
255
- doc = SimpleDocTemplate(pdf_path)
256
- elements = []
257
- styles = getSampleStyleSheet()
258
-
259
- elements.append(Paragraph(f"Case Report: {case_id}", styles["Heading1"]))
260
- elements.append(Spacer(1, 0.3 * inch))
261
-
262
- for file_name in ["manifest.json", "audit_log.json", "ai_analysis.json"]:
263
- file_path = os.path.join(path, file_name)
264
- if os.path.exists(file_path):
265
- elements.append(Paragraph(file_name, styles["Heading2"]))
266
- with open(file_path, "r") as f:
267
- data = json.load(f)
268
- elements.append(Paragraph(str(data), styles["Normal"]))
269
- elements.append(Spacer(1, 0.3 * inch))
270
-
271
- doc.build(elements)
272
- return pdf_path
273
-
274
- # ============================================================
275
- # UI
276
- # ============================================================
277
-
278
- with gr.Blocks(title="Omniscient Investigative Infrastructure vNext") as demo:
279
-
280
- gr.Markdown("## Omniscient – Advanced Case-Based Investigative Platform")
281
-
282
- case_id = gr.Textbox(label="Case ID")
283
-
284
- with gr.Tabs():
285
-
286
- with gr.Tab("Case Management"):
287
- create_btn = gr.Button("Create Case")
288
- list_btn = gr.Button("List Cases")
289
- case_output = gr.Textbox(lines=6)
290
-
291
- with gr.Tab("Evidence"):
292
- operator = gr.Textbox(label="Operator Name")
293
- notes = gr.Textbox(label="Evidence Notes")
294
- file_upload = gr.File(label="Upload Evidence")
295
- register_btn = gr.Button("Register Evidence")
296
- evidence_output = gr.Textbox(lines=10)
297
-
298
- with gr.Tab("AI Augmentation"):
299
- ai_input = gr.Textbox(lines=8)
300
- model_selector = gr.Dropdown(
301
- choices=list(MODEL_OPTIONS.keys()),
302
- value=list(MODEL_OPTIONS.keys())[0]
303
- )
304
- ai_btn = gr.Button("Run AI Analysis")
305
- ai_output = gr.Textbox(lines=20)
306
-
307
- with gr.Tab("Integrity"):
308
- verify_btn = gr.Button("Verify Full Integrity")
309
- timeline_btn = gr.Button("View Case Timeline")
310
- integrity_output = gr.Textbox(lines=15)
311
-
312
- with gr.Tab("Export"):
313
- export_btn = gr.Button("Export Case PDF")
314
- export_file = gr.File()
315
-
316
- create_btn.click(create_case, inputs=case_id, outputs=case_output)
317
- list_btn.click(list_cases, outputs=case_output)
318
- register_btn.click(register_evidence,
319
- inputs=[case_id, file_upload, operator, notes],
320
- outputs=evidence_output)
321
- ai_btn.click(ai_analysis,
322
- inputs=[case_id, ai_input, model_selector],
323
- outputs=ai_output)
324
- verify_btn.click(verify_integrity, inputs=case_id, outputs=integrity_output)
325
- timeline_btn.click(case_timeline, inputs=case_id, outputs=integrity_output)
326
- export_btn.click(export_case_pdf, inputs=case_id, outputs=export_file)
327
-
328
- if __name__ == "__main__":
329
- demo.queue()
330
- demo.launch()
 
 
 
 
 
 
1
  import gradio as gr
2
+ import requests
3
+ from bs4 import BeautifulSoup
4
+ from duckduckgo_search import DDGS
5
+ from sentence_transformers import SentenceTransformer
6
+ from transformers import pipeline
7
+ import numpy as np
8
+ import re
9
+ from sklearn.metrics.pairwise import cosine_similarity
10
+
11
+ # -------------------------
12
+ # Load Models (HF Safe)
13
+ # -------------------------
14
+
15
+ embedder = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
16
+
17
+ llm = pipeline(
18
+ "text-generation",
19
+ model="HuggingFaceH4/zephyr-7b-beta",
20
+ max_new_tokens=512,
21
+ )
22
+
23
+ # -------------------------
24
+ # Simple In-Memory Vector Store
25
+ # -------------------------
26
+
27
+ documents = []
28
+ embeddings = []
29
+
30
+ def add_to_rag(text):
31
+ global documents, embeddings
32
+ documents.append(text)
33
+ embeddings.append(embedder.encode(text))
34
+
35
+ def retrieve_from_rag(query, top_k=3):
36
+ if not embeddings:
37
+ return []
38
+ query_emb = embedder.encode(query)
39
+ sims = cosine_similarity([query_emb], embeddings)[0]
40
+ top_idx = np.argsort(sims)[-top_k:][::-1]
41
+ return [documents[i] for i in top_idx]
42
+
43
+ # -------------------------
44
+ # URL Scraper
45
+ # -------------------------
46
+
47
+ def scrape_url(url):
48
+ try:
49
+ r = requests.get(url, timeout=10)
50
+ soup = BeautifulSoup(r.text, "html.parser")
51
+ text = soup.get_text(separator=" ")
52
+ add_to_rag(text)
53
+ return "Scraped and added to RAG."
54
+ except:
55
+ return "Scrape failed."
56
+
57
+ # -------------------------
58
+ # DuckDuckGo Search
59
+ # -------------------------
60
+
61
+ def ddg_search(query):
62
+ results = []
63
+ with DDGS() as ddgs:
64
+ for r in ddgs.text(query, max_results=5):
65
+ results.append(r["body"])
66
+ combined = "\n".join(results)
67
+ add_to_rag(combined)
68
+ return combined
69
+
70
+ # -------------------------
71
+ # Hybrid Entity Extraction
72
+ # -------------------------
73
+
74
+ def regex_entities(text):
75
+ entities = {
76
+ "emails": re.findall(r"[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}", text),
77
+ "phones": re.findall(r"\+?\d[\d -]{8,}\d", text),
78
+ "urls": re.findall(r"https?://\S+", text),
79
+ "ips": re.findall(r"\b(?:\d{1,3}\.){3}\d{1,3}\b", text),
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
80
  }
81
+ return entities
82
 
83
+ def llm_refine_entities(text):
84
+ prompt = f"""
85
+ Extract structured OSINT entities from this text.
86
+ Return JSON with:
87
+ people, organizations, locations, vehicles, usernames.
 
 
 
 
 
 
 
 
 
 
88
 
89
+ TEXT:
90
+ {text}
91
+ """
92
+ output = llm(prompt)[0]["generated_text"]
93
+ return output
 
 
 
 
 
 
 
 
 
 
 
 
 
94
 
95
+ def hybrid_extract(text):
96
+ base = regex_entities(text)
97
+ refined = llm_refine_entities(text[:2000])
98
+ return f"Regex Extracted:\n{base}\n\nLLM Refined:\n{refined}"
99
 
100
+ # -------------------------
101
+ # Chat Logic
102
+ # -------------------------
103
 
104
+ def chat(query, use_web, use_rag):
105
+ context = ""
 
106
 
107
+ if use_web:
108
+ context += ddg_search(query)
 
 
 
109
 
110
+ if use_rag:
111
+ docs = retrieve_from_rag(query)
112
+ context += "\n".join(docs)
113
 
114
+ final_prompt = f"""
115
+ Use the following context to answer intelligently:
 
 
116
 
117
+ {context}
 
 
118
 
119
+ Question: {query}
120
+ """
121
 
122
+ response = llm(final_prompt)[0]["generated_text"]
123
+ return response
 
124
 
125
+ # -------------------------
126
+ # Gradio UI
127
+ # -------------------------
128
 
129
+ with gr.Blocks() as demo:
130
+ gr.Markdown("# 🔎 Hybrid OSINT AI Assistant")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
131
 
132
+ with gr.Row():
133
+ query = gr.Textbox(label="Ask Question")
134
+ use_web = gr.Checkbox(label="Use DuckDuckGo Search")
135
+ use_rag = gr.Checkbox(label="Use RAG")
136
 
137
+ chat_btn = gr.Button("Run")
138
 
139
+ output = gr.Textbox(label="Response")
 
 
 
 
 
 
140
 
141
+ chat_btn.click(chat, inputs=[query, use_web, use_rag], outputs=output)
 
 
 
142
 
143
+ gr.Markdown("## 🌐 URL → RAG")
144
+ url_input = gr.Textbox(label="Enter URL")
145
+ scrape_btn = gr.Button("Scrape")
146
+ scrape_output = gr.Textbox()
147
+ scrape_btn.click(scrape_url, inputs=url_input, outputs=scrape_output)
148
 
149
+ gr.Markdown("## 🧩 OSINT Entity Extraction")
150
+ extract_input = gr.Textbox(label="Paste Text")
151
+ extract_btn = gr.Button("Extract Entities")
152
+ extract_output = gr.Textbox()
153
+ extract_btn.click(hybrid_extract, inputs=extract_input, outputs=extract_output)
154
 
155
+ demo.launch()