danielostrow commited on
Commit
2e467f6
·
verified ·
1 Parent(s): 4ddf4fa

Upload folder using huggingface_hub

Browse files
Files changed (3) hide show
  1. README.md +1 -1
  2. app.py +290 -87
  3. requirements.txt +1 -0
README.md CHANGED
@@ -4,7 +4,7 @@ emoji: 🛡️
4
  colorFrom: red
5
  colorTo: gray
6
  sdk: gradio
7
- sdk_version: 5.0.0
8
  app_file: app.py
9
  pinned: false
10
  license: mit
 
4
  colorFrom: red
5
  colorTo: gray
6
  sdk: gradio
7
+ sdk_version: 4.44.1
8
  app_file: app.py
9
  pinned: false
10
  license: mit
app.py CHANGED
@@ -7,19 +7,13 @@ Interactive demo for testing C2 beacon detection.
7
  import gradio as gr
8
  import json
9
  from huggingface_hub import hf_hub_download
10
- import sys
11
- import os
12
 
13
- # Download model files from the model repo
14
  model_dir = "."
15
- try:
16
- hf_hub_download(repo_id="danielostrow/c2sentinel", filename="c2sentinel.py", local_dir=model_dir)
17
- hf_hub_download(repo_id="danielostrow/c2sentinel", filename="c2_sentinel.safetensors", local_dir=model_dir)
18
- hf_hub_download(repo_id="danielostrow/c2sentinel", filename="c2_sentinel.json", local_dir=model_dir)
19
- except Exception as e:
20
- print(f"Error downloading model files: {e}")
21
-
22
- # Import the model
23
  from c2sentinel import C2Sentinel
24
 
25
  # Load model
@@ -70,69 +64,191 @@ EXAMPLES = {
70
  {"timestamp": 1705600900, "dst_ip": "203.0.113.50", "dst_port": 8080, "bytes_sent": 256, "bytes_recv": 512},
71
  {"timestamp": 1705601200, "dst_ip": "203.0.113.50", "dst_port": 8080, "bytes_sent": 256, "bytes_recv": 512},
72
  ], indent=2),
 
 
 
 
 
 
 
 
 
73
  }
74
 
75
 
76
- def analyze_connections(connection_json: str, threshold: float, strict_mode: bool) -> tuple:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
77
  """Analyze connection data and return results."""
78
  try:
79
- connections = json.loads(connection_json)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
80
  if not isinstance(connections, list):
81
- return "Error: Input must be a JSON array of connection objects", "", ""
82
 
83
  if len(connections) < 3:
84
- return "Error: Need at least 3 connections for analysis", "", ""
85
 
86
  # Run analysis
87
  result = sentinel.analyze(connections, threshold=threshold, strict_mode=strict_mode)
88
 
89
  # Format primary result
90
  if result.is_c2:
91
- verdict = f"C2 DETECTED: {result.c2_type}"
92
- verdict_color = "red"
93
  else:
94
- verdict = "No C2 Detected"
95
- verdict_color = "green"
96
 
97
- primary = f"""## Verdict: {verdict}
98
 
99
- **Probability:** {result.c2_probability:.1%}
100
- **Confidence:** {result.confidence:.1%}
101
- **Detection Method:** {result.detection_method}
 
 
 
102
  """
103
 
104
  if result.matched_legitimate_pattern:
105
- primary += f"**Matched Pattern:** {result.matched_legitimate_pattern}\n"
106
  if result.service_type:
107
- primary += f"**Service Type:** {result.service_type}\n"
108
  if result.immediate_detection:
109
- primary += "**Immediate Detection:** Yes (signature match)\n"
110
 
111
  # Format risk factors
112
  risk_text = ""
113
  if result.risk_factors:
114
- risk_text = "### Risk Factors\n"
115
  for factor in result.risk_factors:
116
  risk_text += f"- {factor}\n"
117
 
118
  if result.mitigating_factors:
119
- risk_text += "\n### Mitigating Factors\n"
120
  for factor in result.mitigating_factors:
121
  risk_text += f"- {factor}\n"
122
 
123
  # Format recommendations
124
  rec_text = ""
125
  if result.recommendations:
126
- rec_text = "### Recommendations\n"
127
  for rec in result.recommendations:
128
  rec_text += f"- {rec}\n"
129
 
130
- return primary, risk_text, rec_text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
131
 
132
  except json.JSONDecodeError as e:
133
- return f"Error: Invalid JSON - {str(e)}", "", ""
134
  except Exception as e:
135
- return f"Error: {str(e)}", "", ""
136
 
137
 
138
  def load_example(example_name: str) -> str:
@@ -141,67 +257,140 @@ def load_example(example_name: str) -> str:
141
 
142
 
143
  # Build the interface
144
- with gr.Blocks(title="C2Sentinel Demo", theme=gr.themes.Soft()) as demo:
145
  gr.Markdown("""
146
- # C2Sentinel
147
 
148
- **Command and Control Beacon Detection**
149
 
150
- Analyze network connection patterns to detect C2 beacon activity.
151
- The model uses behavioral analysis to identify C2 communications on any port.
152
 
153
- [Model Repository](https://huggingface.co/danielostrow/c2sentinel) | [Documentation](https://huggingface.co/danielostrow/c2sentinel/blob/main/API_REFERENCE.md)
154
  """)
155
 
156
- with gr.Row():
157
- with gr.Column(scale=2):
158
- example_dropdown = gr.Dropdown(
159
- choices=list(EXAMPLES.keys()),
160
- label="Load Example",
161
- value=None
162
- )
163
-
164
- connection_input = gr.Textbox(
165
- label="Connection Data (JSON)",
166
- placeholder='[\n {"timestamp": 1000000, "dst_ip": "10.0.0.1", "dst_port": 443, "bytes_sent": 200, "bytes_recv": 500},\n ...\n]',
167
- lines=15
168
- )
169
-
170
  with gr.Row():
171
- threshold = gr.Slider(
172
- minimum=0.1,
173
- maximum=0.9,
174
- value=0.5,
175
- step=0.1,
176
- label="Detection Threshold"
177
- )
178
- strict_mode = gr.Checkbox(
179
- label="Strict Mode (min 0.7 threshold)",
180
- value=False
181
- )
182
-
183
- analyze_btn = gr.Button("Analyze", variant="primary")
184
-
185
- with gr.Column(scale=2):
186
- result_primary = gr.Markdown(label="Analysis Result")
187
- result_risks = gr.Markdown(label="Risk Analysis")
188
- result_recommendations = gr.Markdown(label="Recommendations")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
189
 
190
- gr.Markdown("""
191
- ---
192
- ### Connection Record Format
193
-
194
- | Field | Type | Required | Description |
195
- |-------|------|----------|-------------|
196
- | `timestamp` | float | Yes | Unix timestamp |
197
- | `dst_ip` | str | Yes | Destination IP |
198
- | `dst_port` | int | Yes | Destination port |
199
- | `bytes_sent` | int | Yes | Bytes sent |
200
- | `bytes_recv` | int | Yes | Bytes received |
201
-
202
- ---
203
- **Author:** Daniel Ostrow | [neuralintellect.com](https://neuralintellect.com)
204
- """)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
205
 
206
  # Event handlers
207
  example_dropdown.change(
@@ -212,10 +401,24 @@ with gr.Blocks(title="C2Sentinel Demo", theme=gr.themes.Soft()) as demo:
212
 
213
  analyze_btn.click(
214
  fn=analyze_connections,
215
- inputs=[connection_input, threshold, strict_mode],
216
- outputs=[result_primary, result_risks, result_recommendations]
 
 
 
 
 
 
 
 
 
217
  )
218
 
 
 
 
 
 
219
 
220
  if __name__ == "__main__":
221
  demo.launch()
 
7
  import gradio as gr
8
  import json
9
  from huggingface_hub import hf_hub_download
 
 
10
 
11
+ # Download model files
12
  model_dir = "."
13
+ hf_hub_download(repo_id="danielostrow/c2sentinel", filename="c2sentinel.py", local_dir=model_dir)
14
+ hf_hub_download(repo_id="danielostrow/c2sentinel", filename="c2_sentinel.safetensors", local_dir=model_dir)
15
+ hf_hub_download(repo_id="danielostrow/c2sentinel", filename="c2_sentinel.json", local_dir=model_dir)
16
+
 
 
 
 
17
  from c2sentinel import C2Sentinel
18
 
19
  # Load model
 
64
  {"timestamp": 1705600900, "dst_ip": "203.0.113.50", "dst_port": 8080, "bytes_sent": 256, "bytes_recv": 512},
65
  {"timestamp": 1705601200, "dst_ip": "203.0.113.50", "dst_port": 8080, "bytes_sent": 256, "bytes_recv": 512},
66
  ], indent=2),
67
+
68
+ "DNS Tunnel C2": json.dumps([
69
+ {"timestamp": 1705600000, "dst_ip": "198.51.100.53", "dst_port": 53, "bytes_sent": 64, "bytes_recv": 512},
70
+ {"timestamp": 1705600005, "dst_ip": "198.51.100.53", "dst_port": 53, "bytes_sent": 64, "bytes_recv": 512},
71
+ {"timestamp": 1705600010, "dst_ip": "198.51.100.53", "dst_port": 53, "bytes_sent": 64, "bytes_recv": 512},
72
+ {"timestamp": 1705600015, "dst_ip": "198.51.100.53", "dst_port": 53, "bytes_sent": 64, "bytes_recv": 512},
73
+ {"timestamp": 1705600020, "dst_ip": "198.51.100.53", "dst_port": 53, "bytes_sent": 64, "bytes_recv": 512},
74
+ {"timestamp": 1705600025, "dst_ip": "198.51.100.53", "dst_port": 53, "bytes_sent": 64, "bytes_recv": 512},
75
+ ], indent=2),
76
  }
77
 
78
 
79
+ def parse_log_file(file_content: str) -> list:
80
+ """Parse various log file formats into connection records."""
81
+ connections = []
82
+ lines = file_content.strip().split('\n')
83
+
84
+ for line in lines:
85
+ line = line.strip()
86
+ if not line or line.startswith('#'):
87
+ continue
88
+
89
+ # Try JSON format
90
+ try:
91
+ record = json.loads(line)
92
+ if 'dst_ip' in record or 'id.resp_h' in record:
93
+ conn = {
94
+ 'timestamp': record.get('timestamp', record.get('ts', 0)),
95
+ 'dst_ip': record.get('dst_ip', record.get('id.resp_h', '')),
96
+ 'dst_port': int(record.get('dst_port', record.get('id.resp_p', 0))),
97
+ 'bytes_sent': int(record.get('bytes_sent', record.get('orig_bytes', 0) or 0)),
98
+ 'bytes_recv': int(record.get('bytes_recv', record.get('resp_bytes', 0) or 0)),
99
+ }
100
+ if conn['dst_ip']:
101
+ connections.append(conn)
102
+ continue
103
+ except (json.JSONDecodeError, ValueError):
104
+ pass
105
+
106
+ # Try Zeek tab-separated format
107
+ parts = line.split('\t')
108
+ if len(parts) >= 10:
109
+ try:
110
+ conn = {
111
+ 'timestamp': float(parts[0]),
112
+ 'dst_ip': parts[4],
113
+ 'dst_port': int(parts[5]),
114
+ 'bytes_sent': int(parts[9] if parts[9] != '-' else 0),
115
+ 'bytes_recv': int(parts[10] if len(parts) > 10 and parts[10] != '-' else 0),
116
+ }
117
+ connections.append(conn)
118
+ continue
119
+ except (ValueError, IndexError):
120
+ pass
121
+
122
+ return connections
123
+
124
+
125
+ def analyze_connections(
126
+ connection_json: str,
127
+ uploaded_file,
128
+ threshold: float,
129
+ strict_mode: bool,
130
+ whitelist_ips: str,
131
+ whitelist_domains: str,
132
+ blacklist_ips: str,
133
+ blacklist_domains: str
134
+ ) -> tuple:
135
  """Analyze connection data and return results."""
136
  try:
137
+ # Reset whitelist/blacklist for this analysis
138
+ sentinel.whitelist_ips = set()
139
+ sentinel.whitelist_domains = set()
140
+ sentinel.blacklist_ips = set()
141
+ sentinel.blacklist_domains = set()
142
+
143
+ # Apply whitelist
144
+ if whitelist_ips.strip():
145
+ ips = [ip.strip() for ip in whitelist_ips.split(',') if ip.strip()]
146
+ sentinel.add_whitelist(ips=ips)
147
+ if whitelist_domains.strip():
148
+ domains = [d.strip() for d in whitelist_domains.split(',') if d.strip()]
149
+ sentinel.add_whitelist(domains=domains)
150
+
151
+ # Apply blacklist
152
+ if blacklist_ips.strip():
153
+ ips = [ip.strip() for ip in blacklist_ips.split(',') if ip.strip()]
154
+ sentinel.add_blacklist(ips=ips)
155
+ if blacklist_domains.strip():
156
+ domains = [d.strip() for d in blacklist_domains.split(',') if d.strip()]
157
+ sentinel.add_blacklist(domains=domains)
158
+
159
+ # Get connections from file upload or text input
160
+ connections = []
161
+
162
+ if uploaded_file is not None:
163
+ file_content = uploaded_file.decode('utf-8') if isinstance(uploaded_file, bytes) else open(uploaded_file, 'r').read()
164
+ connections = parse_log_file(file_content)
165
+ if not connections:
166
+ # Try as JSON array
167
+ try:
168
+ connections = json.loads(file_content)
169
+ except:
170
+ pass
171
+
172
+ if not connections and connection_json.strip():
173
+ connections = json.loads(connection_json)
174
+
175
  if not isinstance(connections, list):
176
+ return "Error: Input must be a JSON array of connection objects", "", "", ""
177
 
178
  if len(connections) < 3:
179
+ return "Error: Need at least 3 connections for analysis", "", "", ""
180
 
181
  # Run analysis
182
  result = sentinel.analyze(connections, threshold=threshold, strict_mode=strict_mode)
183
 
184
  # Format primary result
185
  if result.is_c2:
186
+ verdict = f"**C2 DETECTED:** {result.c2_type}"
 
187
  else:
188
+ verdict = "**No C2 Detected**"
 
189
 
190
+ primary = f"""## {verdict}
191
 
192
+ | Metric | Value |
193
+ |--------|-------|
194
+ | Probability | {result.c2_probability:.1%} |
195
+ | Confidence | {result.confidence:.1%} |
196
+ | Detection Method | {result.detection_method} |
197
+ | Connections Analyzed | {len(connections)} |
198
  """
199
 
200
  if result.matched_legitimate_pattern:
201
+ primary += f"| Matched Pattern | {result.matched_legitimate_pattern} |\n"
202
  if result.service_type:
203
+ primary += f"| Service Type | {result.service_type} |\n"
204
  if result.immediate_detection:
205
+ primary += "| Immediate Detection | Yes (signature match) |\n"
206
 
207
  # Format risk factors
208
  risk_text = ""
209
  if result.risk_factors:
210
+ risk_text = "### Risk Factors\n\n"
211
  for factor in result.risk_factors:
212
  risk_text += f"- {factor}\n"
213
 
214
  if result.mitigating_factors:
215
+ risk_text += "\n### Mitigating Factors\n\n"
216
  for factor in result.mitigating_factors:
217
  risk_text += f"- {factor}\n"
218
 
219
  # Format recommendations
220
  rec_text = ""
221
  if result.recommendations:
222
+ rec_text = "### Recommendations\n\n"
223
  for rec in result.recommendations:
224
  rec_text += f"- {rec}\n"
225
 
226
+ # Connection stats
227
+ stats_text = "### Connection Statistics\n\n"
228
+ if connections:
229
+ dst_ips = set(c.get('dst_ip', '') for c in connections)
230
+ dst_ports = set(c.get('dst_port', 0) for c in connections)
231
+ total_sent = sum(c.get('bytes_sent', 0) for c in connections)
232
+ total_recv = sum(c.get('bytes_recv', 0) for c in connections)
233
+
234
+ stats_text += f"| Stat | Value |\n|------|-------|\n"
235
+ stats_text += f"| Unique Destinations | {len(dst_ips)} |\n"
236
+ stats_text += f"| Unique Ports | {len(dst_ports)} |\n"
237
+ stats_text += f"| Total Bytes Sent | {total_sent:,} |\n"
238
+ stats_text += f"| Total Bytes Received | {total_recv:,} |\n"
239
+
240
+ if len(connections) > 1:
241
+ timestamps = sorted(c.get('timestamp', 0) for c in connections)
242
+ intervals = [timestamps[i+1] - timestamps[i] for i in range(len(timestamps)-1)]
243
+ avg_interval = sum(intervals) / len(intervals)
244
+ stats_text += f"| Avg Interval | {avg_interval:.1f}s |\n"
245
+
246
+ return primary, risk_text, rec_text, stats_text
247
 
248
  except json.JSONDecodeError as e:
249
+ return f"Error: Invalid JSON - {str(e)}", "", "", ""
250
  except Exception as e:
251
+ return f"Error: {str(e)}", "", "", ""
252
 
253
 
254
  def load_example(example_name: str) -> str:
 
257
 
258
 
259
  # Build the interface
260
+ with gr.Blocks(title="C2Sentinel", theme=gr.themes.Soft()) as demo:
261
  gr.Markdown("""
262
+ # C2Sentinel
263
 
264
+ **Command and Control Beacon Detection**
265
 
266
+ Analyze network connection patterns to detect C2 beacon activity using behavioral analysis.
267
+ The model identifies C2 communications on any port by analyzing timing patterns, packet sizes, and traffic symmetry.
268
 
269
+ [Model Repository](https://huggingface.co/danielostrow/c2sentinel) | [API Documentation](https://huggingface.co/danielostrow/c2sentinel/blob/main/API_REFERENCE.md) | [neuralintellect.com](https://neuralintellect.com)
270
  """)
271
 
272
+ with gr.Tabs():
273
+ with gr.TabItem("Analyze"):
 
 
 
 
 
 
 
 
 
 
 
 
274
  with gr.Row():
275
+ with gr.Column(scale=1):
276
+ gr.Markdown("### Input")
277
+
278
+ example_dropdown = gr.Dropdown(
279
+ choices=list(EXAMPLES.keys()),
280
+ label="Load Example",
281
+ value=None
282
+ )
283
+
284
+ connection_input = gr.Textbox(
285
+ label="Connection Data (JSON)",
286
+ placeholder='[\n {"timestamp": 1000000, "dst_ip": "10.0.0.1", "dst_port": 443, "bytes_sent": 200, "bytes_recv": 500},\n ...\n]',
287
+ lines=12
288
+ )
289
+
290
+ file_upload = gr.File(
291
+ label="Or Upload Log File (JSON, Zeek conn.log)",
292
+ file_types=[".json", ".log", ".txt"],
293
+ type="binary"
294
+ )
295
+
296
+ gr.Markdown("### Detection Settings")
297
+
298
+ threshold = gr.Slider(
299
+ minimum=0.1,
300
+ maximum=0.9,
301
+ value=0.5,
302
+ step=0.05,
303
+ label="Detection Threshold",
304
+ info="Lower = more sensitive, Higher = fewer false positives"
305
+ )
306
+
307
+ strict_mode = gr.Checkbox(
308
+ label="Strict Mode",
309
+ value=False,
310
+ info="Enforce minimum 0.7 threshold for high-confidence detections only"
311
+ )
312
+
313
+ analyze_btn = gr.Button("Analyze", variant="primary", size="lg")
314
+
315
+ with gr.Column(scale=1):
316
+ gr.Markdown("### Results")
317
+ result_primary = gr.Markdown()
318
+ result_stats = gr.Markdown()
319
+ result_risks = gr.Markdown()
320
+ result_recommendations = gr.Markdown()
321
+
322
+ with gr.TabItem("Whitelist / Blacklist"):
323
+ gr.Markdown("""
324
+ ### Configure Trusted and Blocked Indicators
325
+
326
+ Add IPs and domains to customize detection behavior. Separate multiple entries with commas.
327
+ """)
328
 
329
+ with gr.Row():
330
+ with gr.Column():
331
+ gr.Markdown("#### Whitelist (Trusted)")
332
+ whitelist_ips = gr.Textbox(
333
+ label="Trusted IPs",
334
+ placeholder="8.8.8.8, 1.1.1.1, 192.168.1.0/24",
335
+ lines=2
336
+ )
337
+ whitelist_domains = gr.Textbox(
338
+ label="Trusted Domains",
339
+ placeholder="google.com, microsoft.com, github.com",
340
+ lines=2
341
+ )
342
+
343
+ with gr.Column():
344
+ gr.Markdown("#### Blacklist (Suspicious)")
345
+ blacklist_ips = gr.Textbox(
346
+ label="Blocked IPs",
347
+ placeholder="10.10.10.10, 45.33.32.156",
348
+ lines=2
349
+ )
350
+ blacklist_domains = gr.Textbox(
351
+ label="Blocked Domains",
352
+ placeholder="malware.example.com, c2server.bad",
353
+ lines=2
354
+ )
355
+
356
+ gr.Markdown("""
357
+ **Note:** Whitelist/blacklist settings apply to the current analysis only.
358
+ - Whitelisted IPs will reduce C2 probability
359
+ - Blacklisted IPs will increase C2 probability
360
+ """)
361
+
362
+ with gr.TabItem("Log Format"):
363
+ gr.Markdown("""
364
+ ### Supported Log Formats
365
+
366
+ #### JSON Array
367
+ ```json
368
+ [
369
+ {"timestamp": 1705600000, "dst_ip": "10.0.0.1", "dst_port": 443, "bytes_sent": 200, "bytes_recv": 500},
370
+ {"timestamp": 1705600060, "dst_ip": "10.0.0.1", "dst_port": 443, "bytes_sent": 200, "bytes_recv": 500}
371
+ ]
372
+ ```
373
+
374
+ #### JSON Lines (NDJSON)
375
+ ```
376
+ {"timestamp": 1705600000, "dst_ip": "10.0.0.1", "dst_port": 443, "bytes_sent": 200, "bytes_recv": 500}
377
+ {"timestamp": 1705600060, "dst_ip": "10.0.0.1", "dst_port": 443, "bytes_sent": 200, "bytes_recv": 500}
378
+ ```
379
+
380
+ #### Zeek conn.log Format
381
+ The parser also supports Zeek/Bro conn.log tab-separated format with fields:
382
+ `ts, uid, id.orig_h, id.orig_p, id.resp_h, id.resp_p, proto, service, duration, orig_bytes, resp_bytes, ...`
383
+
384
+ ### Required Fields
385
+
386
+ | Field | Type | Description |
387
+ |-------|------|-------------|
388
+ | `timestamp` | float | Unix timestamp |
389
+ | `dst_ip` | string | Destination IP address |
390
+ | `dst_port` | int | Destination port |
391
+ | `bytes_sent` | int | Bytes sent |
392
+ | `bytes_recv` | int | Bytes received |
393
+ """)
394
 
395
  # Event handlers
396
  example_dropdown.change(
 
401
 
402
  analyze_btn.click(
403
  fn=analyze_connections,
404
+ inputs=[
405
+ connection_input,
406
+ file_upload,
407
+ threshold,
408
+ strict_mode,
409
+ whitelist_ips,
410
+ whitelist_domains,
411
+ blacklist_ips,
412
+ blacklist_domains
413
+ ],
414
+ outputs=[result_primary, result_risks, result_recommendations, result_stats]
415
  )
416
 
417
+ gr.Markdown("""
418
+ ---
419
+ **Author:** Daniel Ostrow | [neuralintellect.com](https://neuralintellect.com) | Built on [LogBERT](https://arxiv.org/abs/2103.04475)
420
+ """)
421
+
422
 
423
  if __name__ == "__main__":
424
  demo.launch()
requirements.txt CHANGED
@@ -2,3 +2,4 @@ torch
2
  numpy
3
  safetensors
4
  huggingface_hub
 
 
2
  numpy
3
  safetensors
4
  huggingface_hub
5
+ gradio==4.44.1