Spaces:

danielostrow
/

c2sentinel

Running

App Files Files Community

danielostrow commited on 2 days ago

Commit

2e467f6

verified ·

1 Parent(s): 4ddf4fa

Upload folder using huggingface_hub

Browse files

Files changed (3) hide show

README.md +1 -1
app.py +290 -87
requirements.txt +1 -0

README.md CHANGED Viewed

@@ -4,7 +4,7 @@ emoji: 🛡️
 colorFrom: red
 colorTo: gray
 sdk: gradio
-sdk_version: 5.0.0
 app_file: app.py
 pinned: false
 license: mit

 colorFrom: red
 colorTo: gray
 sdk: gradio
+sdk_version: 4.44.1
 app_file: app.py
 pinned: false
 license: mit

app.py CHANGED Viewed

@@ -7,19 +7,13 @@ Interactive demo for testing C2 beacon detection.
 import gradio as gr
 import json
 from huggingface_hub import hf_hub_download
-import sys
-import os
-# Download model files from the model repo
 model_dir = "."
-try:
-    hf_hub_download(repo_id="danielostrow/c2sentinel", filename="c2sentinel.py", local_dir=model_dir)
-    hf_hub_download(repo_id="danielostrow/c2sentinel", filename="c2_sentinel.safetensors", local_dir=model_dir)
-    hf_hub_download(repo_id="danielostrow/c2sentinel", filename="c2_sentinel.json", local_dir=model_dir)
-except Exception as e:
-    print(f"Error downloading model files: {e}")
-# Import the model
 from c2sentinel import C2Sentinel
 # Load model
@@ -70,69 +64,191 @@ EXAMPLES = {
         {"timestamp": 1705600900, "dst_ip": "203.0.113.50", "dst_port": 8080, "bytes_sent": 256, "bytes_recv": 512},
         {"timestamp": 1705601200, "dst_ip": "203.0.113.50", "dst_port": 8080, "bytes_sent": 256, "bytes_recv": 512},
     ], indent=2),
 }
-def analyze_connections(connection_json: str, threshold: float, strict_mode: bool) -> tuple:
     """Analyze connection data and return results."""
     try:
-        connections = json.loads(connection_json)
         if not isinstance(connections, list):
-            return "Error: Input must be a JSON array of connection objects", "", ""
         if len(connections) < 3:
-            return "Error: Need at least 3 connections for analysis", "", ""
         # Run analysis
         result = sentinel.analyze(connections, threshold=threshold, strict_mode=strict_mode)
         # Format primary result
         if result.is_c2:
-            verdict = f"C2 DETECTED: {result.c2_type}"
-            verdict_color = "red"
         else:
-            verdict = "No C2 Detected"
-            verdict_color = "green"
-        primary = f"""## Verdict: {verdict}
-**Probability:** {result.c2_probability:.1%}
-**Confidence:** {result.confidence:.1%}
-**Detection Method:** {result.detection_method}
 """
         if result.matched_legitimate_pattern:
-            primary += f"**Matched Pattern:** {result.matched_legitimate_pattern}\n"
         if result.service_type:
-            primary += f"**Service Type:** {result.service_type}\n"
         if result.immediate_detection:
-            primary += "**Immediate Detection:** Yes (signature match)\n"
         # Format risk factors
         risk_text = ""
         if result.risk_factors:
-            risk_text = "### Risk Factors\n"
             for factor in result.risk_factors:
                 risk_text += f"- {factor}\n"
         if result.mitigating_factors:
-            risk_text += "\n### Mitigating Factors\n"
             for factor in result.mitigating_factors:
                 risk_text += f"- {factor}\n"
         # Format recommendations
         rec_text = ""
         if result.recommendations:
-            rec_text = "### Recommendations\n"
             for rec in result.recommendations:
                 rec_text += f"- {rec}\n"
-        return primary, risk_text, rec_text
     except json.JSONDecodeError as e:
-        return f"Error: Invalid JSON - {str(e)}", "", ""
     except Exception as e:
-        return f"Error: {str(e)}", "", ""
 def load_example(example_name: str) -> str:
@@ -141,67 +257,140 @@ def load_example(example_name: str) -> str:
 # Build the interface
-with gr.Blocks(title="C2Sentinel Demo", theme=gr.themes.Soft()) as demo:
     gr.Markdown("""
-    # C2Sentinel
-    **Command and Control Beacon Detection**
-    Analyze network connection patterns to detect C2 beacon activity.
-    The model uses behavioral analysis to identify C2 communications on any port.
-    [Model Repository](https://huggingface.co/danielostrow/c2sentinel) | [Documentation](https://huggingface.co/danielostrow/c2sentinel/blob/main/API_REFERENCE.md)
     """)
-    with gr.Row():
-        with gr.Column(scale=2):
-            example_dropdown = gr.Dropdown(
-                choices=list(EXAMPLES.keys()),
-                label="Load Example",
-                value=None
-            )
-            connection_input = gr.Textbox(
-                label="Connection Data (JSON)",
-                placeholder='[\n  {"timestamp": 1000000, "dst_ip": "10.0.0.1", "dst_port": 443, "bytes_sent": 200, "bytes_recv": 500},\n  ...\n]',
-                lines=15
-            )
             with gr.Row():
-                threshold = gr.Slider(
-                    minimum=0.1,
-                    maximum=0.9,
-                    value=0.5,
-                    step=0.1,
-                    label="Detection Threshold"
-                )
-                strict_mode = gr.Checkbox(
-                    label="Strict Mode (min 0.7 threshold)",
-                    value=False
-                )
-            analyze_btn = gr.Button("Analyze", variant="primary")
-        with gr.Column(scale=2):
-            result_primary = gr.Markdown(label="Analysis Result")
-            result_risks = gr.Markdown(label="Risk Analysis")
-            result_recommendations = gr.Markdown(label="Recommendations")
-    gr.Markdown("""
-    ---
-    ### Connection Record Format
-    | Field | Type | Required | Description |
-    |-------|------|----------|-------------|
-    | `timestamp` | float | Yes | Unix timestamp |
-    | `dst_ip` | str | Yes | Destination IP |
-    | `dst_port` | int | Yes | Destination port |
-    | `bytes_sent` | int | Yes | Bytes sent |
-    | `bytes_recv` | int | Yes | Bytes received |
-    ---
-    **Author:** Daniel Ostrow | [neuralintellect.com](https://neuralintellect.com)
-    """)
     # Event handlers
     example_dropdown.change(
@@ -212,10 +401,24 @@ with gr.Blocks(title="C2Sentinel Demo", theme=gr.themes.Soft()) as demo:
     analyze_btn.click(
         fn=analyze_connections,
-        inputs=[connection_input, threshold, strict_mode],
-        outputs=[result_primary, result_risks, result_recommendations]
     )
 if __name__ == "__main__":
     demo.launch()

 import gradio as gr
 import json
 from huggingface_hub import hf_hub_download
+# Download model files
 model_dir = "."
+hf_hub_download(repo_id="danielostrow/c2sentinel", filename="c2sentinel.py", local_dir=model_dir)
+hf_hub_download(repo_id="danielostrow/c2sentinel", filename="c2_sentinel.safetensors", local_dir=model_dir)
+hf_hub_download(repo_id="danielostrow/c2sentinel", filename="c2_sentinel.json", local_dir=model_dir)
 from c2sentinel import C2Sentinel
 # Load model
         {"timestamp": 1705600900, "dst_ip": "203.0.113.50", "dst_port": 8080, "bytes_sent": 256, "bytes_recv": 512},
         {"timestamp": 1705601200, "dst_ip": "203.0.113.50", "dst_port": 8080, "bytes_sent": 256, "bytes_recv": 512},
     ], indent=2),
+    "DNS Tunnel C2": json.dumps([
+        {"timestamp": 1705600000, "dst_ip": "198.51.100.53", "dst_port": 53, "bytes_sent": 64, "bytes_recv": 512},
+        {"timestamp": 1705600005, "dst_ip": "198.51.100.53", "dst_port": 53, "bytes_sent": 64, "bytes_recv": 512},
+        {"timestamp": 1705600010, "dst_ip": "198.51.100.53", "dst_port": 53, "bytes_sent": 64, "bytes_recv": 512},
+        {"timestamp": 1705600015, "dst_ip": "198.51.100.53", "dst_port": 53, "bytes_sent": 64, "bytes_recv": 512},
+        {"timestamp": 1705600020, "dst_ip": "198.51.100.53", "dst_port": 53, "bytes_sent": 64, "bytes_recv": 512},
+        {"timestamp": 1705600025, "dst_ip": "198.51.100.53", "dst_port": 53, "bytes_sent": 64, "bytes_recv": 512},
+    ], indent=2),
 }
+def parse_log_file(file_content: str) -> list:
+    """Parse various log file formats into connection records."""
+    connections = []
+    lines = file_content.strip().split('\n')
+    for line in lines:
+        line = line.strip()
+        if not line or line.startswith('#'):
+            continue
+        # Try JSON format
+        try:
+            record = json.loads(line)
+            if 'dst_ip' in record or 'id.resp_h' in record:
+                conn = {
+                    'timestamp': record.get('timestamp', record.get('ts', 0)),
+                    'dst_ip': record.get('dst_ip', record.get('id.resp_h', '')),
+                    'dst_port': int(record.get('dst_port', record.get('id.resp_p', 0))),
+                    'bytes_sent': int(record.get('bytes_sent', record.get('orig_bytes', 0) or 0)),
+                    'bytes_recv': int(record.get('bytes_recv', record.get('resp_bytes', 0) or 0)),
+                }
+                if conn['dst_ip']:
+                    connections.append(conn)
+                continue
+        except (json.JSONDecodeError, ValueError):
+            pass
+        # Try Zeek tab-separated format
+        parts = line.split('\t')
+        if len(parts) >= 10:
+            try:
+                conn = {
+                    'timestamp': float(parts[0]),
+                    'dst_ip': parts[4],
+                    'dst_port': int(parts[5]),
+                    'bytes_sent': int(parts[9] if parts[9] != '-' else 0),
+                    'bytes_recv': int(parts[10] if len(parts) > 10 and parts[10] != '-' else 0),
+                }
+                connections.append(conn)
+                continue
+            except (ValueError, IndexError):
+                pass
+    return connections
+def analyze_connections(
+    connection_json: str,
+    uploaded_file,
+    threshold: float,
+    strict_mode: bool,
+    whitelist_ips: str,
+    whitelist_domains: str,
+    blacklist_ips: str,
+    blacklist_domains: str
+) -> tuple:
     """Analyze connection data and return results."""
     try:
+        # Reset whitelist/blacklist for this analysis
+        sentinel.whitelist_ips = set()
+        sentinel.whitelist_domains = set()
+        sentinel.blacklist_ips = set()
+        sentinel.blacklist_domains = set()
+        # Apply whitelist
+        if whitelist_ips.strip():
+            ips = [ip.strip() for ip in whitelist_ips.split(',') if ip.strip()]
+            sentinel.add_whitelist(ips=ips)
+        if whitelist_domains.strip():
+            domains = [d.strip() for d in whitelist_domains.split(',') if d.strip()]
+            sentinel.add_whitelist(domains=domains)
+        # Apply blacklist
+        if blacklist_ips.strip():
+            ips = [ip.strip() for ip in blacklist_ips.split(',') if ip.strip()]
+            sentinel.add_blacklist(ips=ips)
+        if blacklist_domains.strip():
+            domains = [d.strip() for d in blacklist_domains.split(',') if d.strip()]
+            sentinel.add_blacklist(domains=domains)
+        # Get connections from file upload or text input
+        connections = []
+        if uploaded_file is not None:
+            file_content = uploaded_file.decode('utf-8') if isinstance(uploaded_file, bytes) else open(uploaded_file, 'r').read()
+            connections = parse_log_file(file_content)
+            if not connections:
+                # Try as JSON array
+                try:
+                    connections = json.loads(file_content)
+                except:
+                    pass
+        if not connections and connection_json.strip():
+            connections = json.loads(connection_json)
         if not isinstance(connections, list):
+            return "Error: Input must be a JSON array of connection objects", "", "", ""
         if len(connections) < 3:
+            return "Error: Need at least 3 connections for analysis", "", "", ""
         # Run analysis
         result = sentinel.analyze(connections, threshold=threshold, strict_mode=strict_mode)
         # Format primary result
         if result.is_c2:
+            verdict = f"**C2 DETECTED:** {result.c2_type}"
         else:
+            verdict = "**No C2 Detected**"
+        primary = f"""## {verdict}
+| Metric | Value |
+|--------|-------|
+| Probability | {result.c2_probability:.1%} |
+| Confidence | {result.confidence:.1%} |
+| Detection Method | {result.detection_method} |
+| Connections Analyzed | {len(connections)} |
 """
         if result.matched_legitimate_pattern:
+            primary += f"| Matched Pattern | {result.matched_legitimate_pattern} |\n"
         if result.service_type:
+            primary += f"| Service Type | {result.service_type} |\n"
         if result.immediate_detection:
+            primary += "| Immediate Detection | Yes (signature match) |\n"
         # Format risk factors
         risk_text = ""
         if result.risk_factors:
+            risk_text = "### Risk Factors\n\n"
             for factor in result.risk_factors:
                 risk_text += f"- {factor}\n"
         if result.mitigating_factors:
+            risk_text += "\n### Mitigating Factors\n\n"
             for factor in result.mitigating_factors:
                 risk_text += f"- {factor}\n"
         # Format recommendations
         rec_text = ""
         if result.recommendations:
+            rec_text = "### Recommendations\n\n"
             for rec in result.recommendations:
                 rec_text += f"- {rec}\n"
+        # Connection stats
+        stats_text = "### Connection Statistics\n\n"
+        if connections:
+            dst_ips = set(c.get('dst_ip', '') for c in connections)
+            dst_ports = set(c.get('dst_port', 0) for c in connections)
+            total_sent = sum(c.get('bytes_sent', 0) for c in connections)
+            total_recv = sum(c.get('bytes_recv', 0) for c in connections)
+            stats_text += f"| Stat | Value |\n|------|-------|\n"
+            stats_text += f"| Unique Destinations | {len(dst_ips)} |\n"
+            stats_text += f"| Unique Ports | {len(dst_ports)} |\n"
+            stats_text += f"| Total Bytes Sent | {total_sent:,} |\n"
+            stats_text += f"| Total Bytes Received | {total_recv:,} |\n"
+            if len(connections) > 1:
+                timestamps = sorted(c.get('timestamp', 0) for c in connections)
+                intervals = [timestamps[i+1] - timestamps[i] for i in range(len(timestamps)-1)]
+                avg_interval = sum(intervals) / len(intervals)
+                stats_text += f"| Avg Interval | {avg_interval:.1f}s |\n"
+        return primary, risk_text, rec_text, stats_text
     except json.JSONDecodeError as e:
+        return f"Error: Invalid JSON - {str(e)}", "", "", ""
     except Exception as e:
+        return f"Error: {str(e)}", "", "", ""
 def load_example(example_name: str) -> str:
 # Build the interface
+with gr.Blocks(title="C2Sentinel", theme=gr.themes.Soft()) as demo:
     gr.Markdown("""
+# C2Sentinel
+**Command and Control Beacon Detection**
+Analyze network connection patterns to detect C2 beacon activity using behavioral analysis.
+The model identifies C2 communications on any port by analyzing timing patterns, packet sizes, and traffic symmetry.
+[Model Repository](https://huggingface.co/danielostrow/c2sentinel) | [API Documentation](https://huggingface.co/danielostrow/c2sentinel/blob/main/API_REFERENCE.md) | [neuralintellect.com](https://neuralintellect.com)
     """)
+    with gr.Tabs():
+        with gr.TabItem("Analyze"):
             with gr.Row():
+                with gr.Column(scale=1):
+                    gr.Markdown("### Input")
+                    example_dropdown = gr.Dropdown(
+                        choices=list(EXAMPLES.keys()),
+                        label="Load Example",
+                        value=None
+                    )
+                    connection_input = gr.Textbox(
+                        label="Connection Data (JSON)",
+                        placeholder='[\n  {"timestamp": 1000000, "dst_ip": "10.0.0.1", "dst_port": 443, "bytes_sent": 200, "bytes_recv": 500},\n  ...\n]',
+                        lines=12
+                    )
+                    file_upload = gr.File(
+                        label="Or Upload Log File (JSON, Zeek conn.log)",
+                        file_types=[".json", ".log", ".txt"],
+                        type="binary"
+                    )
+                    gr.Markdown("### Detection Settings")
+                    threshold = gr.Slider(
+                        minimum=0.1,
+                        maximum=0.9,
+                        value=0.5,
+                        step=0.05,
+                        label="Detection Threshold",
+                        info="Lower = more sensitive, Higher = fewer false positives"
+                    )
+                    strict_mode = gr.Checkbox(
+                        label="Strict Mode",
+                        value=False,
+                        info="Enforce minimum 0.7 threshold for high-confidence detections only"
+                    )
+                    analyze_btn = gr.Button("Analyze", variant="primary", size="lg")
+                with gr.Column(scale=1):
+                    gr.Markdown("### Results")
+                    result_primary = gr.Markdown()
+                    result_stats = gr.Markdown()
+                    result_risks = gr.Markdown()
+                    result_recommendations = gr.Markdown()
+        with gr.TabItem("Whitelist / Blacklist"):
+            gr.Markdown("""
+### Configure Trusted and Blocked Indicators
+Add IPs and domains to customize detection behavior. Separate multiple entries with commas.
+            """)
+            with gr.Row():
+                with gr.Column():
+                    gr.Markdown("#### Whitelist (Trusted)")
+                    whitelist_ips = gr.Textbox(
+                        label="Trusted IPs",
+                        placeholder="8.8.8.8, 1.1.1.1, 192.168.1.0/24",
+                        lines=2
+                    )
+                    whitelist_domains = gr.Textbox(
+                        label="Trusted Domains",
+                        placeholder="google.com, microsoft.com, github.com",
+                        lines=2
+                    )
+                with gr.Column():
+                    gr.Markdown("#### Blacklist (Suspicious)")
+                    blacklist_ips = gr.Textbox(
+                        label="Blocked IPs",
+                        placeholder="10.10.10.10, 45.33.32.156",
+                        lines=2
+                    )
+                    blacklist_domains = gr.Textbox(
+                        label="Blocked Domains",
+                        placeholder="malware.example.com, c2server.bad",
+                        lines=2
+                    )
+            gr.Markdown("""
+**Note:** Whitelist/blacklist settings apply to the current analysis only.
+- Whitelisted IPs will reduce C2 probability
+- Blacklisted IPs will increase C2 probability
+            """)
+        with gr.TabItem("Log Format"):
+            gr.Markdown("""
+### Supported Log Formats
+#### JSON Array
+```json
+[
+  {"timestamp": 1705600000, "dst_ip": "10.0.0.1", "dst_port": 443, "bytes_sent": 200, "bytes_recv": 500},
+  {"timestamp": 1705600060, "dst_ip": "10.0.0.1", "dst_port": 443, "bytes_sent": 200, "bytes_recv": 500}
+]
+```
+#### JSON Lines (NDJSON)
+```
+{"timestamp": 1705600000, "dst_ip": "10.0.0.1", "dst_port": 443, "bytes_sent": 200, "bytes_recv": 500}
+{"timestamp": 1705600060, "dst_ip": "10.0.0.1", "dst_port": 443, "bytes_sent": 200, "bytes_recv": 500}
+```
+#### Zeek conn.log Format
+The parser also supports Zeek/Bro conn.log tab-separated format with fields:
+`ts, uid, id.orig_h, id.orig_p, id.resp_h, id.resp_p, proto, service, duration, orig_bytes, resp_bytes, ...`
+### Required Fields
+| Field | Type | Description |
+|-------|------|-------------|
+| `timestamp` | float | Unix timestamp |
+| `dst_ip` | string | Destination IP address |
+| `dst_port` | int | Destination port |
+| `bytes_sent` | int | Bytes sent |
+| `bytes_recv` | int | Bytes received |
+            """)
     # Event handlers
     example_dropdown.change(
     analyze_btn.click(
         fn=analyze_connections,
+        inputs=[
+            connection_input,
+            file_upload,
+            threshold,
+            strict_mode,
+            whitelist_ips,
+            whitelist_domains,
+            blacklist_ips,
+            blacklist_domains
+        ],
+        outputs=[result_primary, result_risks, result_recommendations, result_stats]
     )
+    gr.Markdown("""
+---
+**Author:** Daniel Ostrow | [neuralintellect.com](https://neuralintellect.com) | Built on [LogBERT](https://arxiv.org/abs/2103.04475)
+    """)
 if __name__ == "__main__":
     demo.launch()

requirements.txt CHANGED Viewed

@@ -2,3 +2,4 @@ torch
 numpy
 safetensors
 huggingface_hub

 numpy
 safetensors
 huggingface_hub
+gradio==4.44.1