Spaces:

samarthnaikk
/

amide-models

Sleeping

Samarth Naik commited on Dec 9, 2025

Commit

a7252f1

1 Parent(s): 1c9f2d5

Update /compute endpoint to run all 3 models simultaneously with packet counting

- Removed model_type parameter requirement
- Endpoint now executes all models in parallel
- Single response includes all model outputs clearly separated
- Added total_packets and unique_flows counts
- Updated README.md with new request/response format examples

Files changed (2) hide show

README.md +65 -12
app.py +121 -95

README.md CHANGED Viewed

@@ -69,12 +69,11 @@ Returns available models and their configuration.
 ```
 ### POST `/compute`
-Run breach prediction on network logs.
 **Request:**
 ```json
 {
-  "model_type": "lightGBM",
   "file": [
     {
       "timestamp": "2024-01-01T10:00:00",
@@ -84,7 +83,21 @@ Run breach prediction on network logs.
       "dst_port": 80,
       "packet_size": 1500,
       "seq": 1000,
-      "ack": 2000
     }
   ]
 }
@@ -94,19 +107,59 @@ Run breach prediction on network logs.
 ```json
 {
   "success": true,
-  "output": "Model execution output",
-  "predictions": [
-    {
-      "timestamp": "2024-01-01T10:00:00",
-      "src_ip": "192.168.1.100",
-      "breach_probability": 0.95,
-      "breach_predicted": 1
     }
-  ],
-  "error": null
 }
 ```
 ## Required Input Columns
 - `timestamp`: Timestamp of the network flow

 ```
 ### POST `/compute`
+Run breach prediction using **all 3 models simultaneously** on network logs.
 **Request:**
 ```json
 {
   "file": [
     {
       "timestamp": "2024-01-01T10:00:00",
       "dst_port": 80,
       "packet_size": 1500,
       "seq": 1000,
+      "ack": 2000,
+      "tcp_flags": 2,
+      "window": 65535
+    },
+    {
+      "timestamp": "2024-01-01T10:00:01",
+      "src_ip": "192.168.1.101",
+      "dst_ip": "10.0.0.2",
+      "src_port": 12346,
+      "dst_port": 443,
+      "packet_size": 1500,
+      "seq": 1001,
+      "ack": 2001,
+      "tcp_flags": 2,
+      "window": 65535
     }
   ]
 }
 ```json
 {
   "success": true,
+  "packets": {
+    "total": 2,
+    "unique_flows": 2
+  },
+  "models": {
+    "lightGBM": {
+      "success": true,
+      "output": "Model execution output",
+      "predictions": [
+        {
+          "timestamp": "2024-01-01T10:00:00",
+          "src_ip": "192.168.1.100",
+          "breach_probability": 0.95,
+          "breach_predicted": 1
+        }
+      ],
+      "error": null
+    },
+    "autoencoder": {
+      "success": true,
+      "output": "Model execution output",
+      "predictions": [
+        {
+          "timestamp": "2024-01-01T10:00:00",
+          "anomaly_score": 0.87,
+          "is_anomaly": true
+        }
+      ],
+      "error": null
+    },
+    "XGB_lstm": {
+      "success": true,
+      "output": "Model execution output",
+      "predictions": [
+        {
+          "timestamp": "2024-01-01T10:00:00",
+          "breach_risk": 0.92,
+          "prediction": 1
+        }
+      ],
+      "error": null
     }
+  }
 }
 ```
+**Response Format:**
+- `success`: Overall success status (all models succeeded)
+- `packets.total`: Total number of packets in the request
+- `packets.unique_flows`: Number of unique network flows (src_ip:src_port → dst_ip:dst_port)
+- `models`: Dictionary containing results from each model with the same name as the model
+  - Each model includes: `success` (bool), `output` (stdout), `predictions` (array), `error` (stderr)
 ## Required Input Columns
 - `timestamp`: Timestamp of the network flow

app.py CHANGED Viewed

@@ -44,29 +44,23 @@ def compute():
         if not data:
             return jsonify({"error": "No JSON data provided"}), 400
-        model_type = data.get('model_type')
         file_data = data.get('file')
-        if not model_type or not file_data:
-            return jsonify({"error": "model_type and file are required"}), 400
-        # Validate model type
-        if model_type not in MODEL_CONFIGS:
-            return jsonify({
-                "error": f"Unsupported model type. Available: {list(MODEL_CONFIGS.keys())}"
-            }), 400
         # Validate input data
         is_valid, validation_msg = validate_input_data(file_data)
         if not is_valid:
             return jsonify({"error": f"Invalid input data: {validation_msg}"}), 400
-        model_config = MODEL_CONFIGS[model_type]
-        model_file = model_config['file']
-        # Check if model file exists
-        if not os.path.exists(model_file):
-            return jsonify({"error": f"Model file {model_file} not found"}), 404
         # Create temporary CSV file with unique name
         temp_filename = f"temp_input_{unique_id}.csv"
@@ -78,91 +72,122 @@ def compute():
             writer.writeheader()
             writer.writerows(file_data)
-        try:
-            # Handle different model interfaces
-            if model_config['interface'] == 'argparse':
-                # For XGB_lstm.py which uses --logfile argument
-                cmd = ['python', model_file, '--logfile', temp_filename]
-            else:
-                # For models that expect hardcoded filename, create a symlink
-                expected_filename = "network_logs.csv"
-                backup_filename = None
-                # Backup existing file if it exists
-                if os.path.exists(expected_filename):
-                    backup_filename = f"backup_{expected_filename}_{unique_id}"
-                    os.rename(expected_filename, backup_filename)
-                # Create symlink or copy
-                try:
-                    os.symlink(os.path.abspath(temp_filename), expected_filename)
-                except OSError:
-                    # Fallback to copy if symlink fails
-                    import shutil
-                    shutil.copy2(temp_filename, expected_filename)
-                cmd = ['python', model_file]
-            # Run the model
-            result = subprocess.run(
-                cmd,
-                capture_output=True,
-                text=True,
-                timeout=300,  # 5 minute timeout
-                cwd=os.getcwd()
-            )
-            # Clean up hardcoded file if used
-            if model_config['interface'] == 'hardcoded':
-                if os.path.exists("network_logs.csv"):
-                    os.unlink("network_logs.csv")
-                if backup_filename and os.path.exists(backup_filename):
-                    os.rename(backup_filename, "network_logs.csv")
-            # Clean up temp file
-            if os.path.exists(temp_filename):
-                os.unlink(temp_filename)
-            if result.returncode == 0:
-                # Try to read output file if it exists
-                output_files = {
-                    'lightGBM': 'lightgbm_breach_predictions.csv',
-                    'autoencoder': 'breach_predictions.csv',
-                    'XGB_lstm': 'xgb_lstm_predictions.csv'
                 }
-                output_data = None
-                output_file = output_files.get(model_type)
-                if output_file and os.path.exists(output_file):
                     try:
-                        import pandas as pd
-                        df = pd.read_csv(output_file)
-                        output_data = df.to_dict('records')
-                        # Rename output file to avoid conflicts
-                        os.rename(output_file, f"{unique_id}_{output_file}")
-                    except Exception as e:
-                        print(f"Warning: Could not read output file: {e}")
-                return jsonify({
-                    "success": True,
-                    "model": model_type,
-                    "output": result.stdout,
-                    "predictions": output_data,
-                    "error": result.stderr if result.stderr else None
-                }), 200
-            else:
-                return jsonify({
                     "success": False,
-                    "model": model_type,
-                    "output": result.stdout,
-                    "error": result.stderr
-                }), 500
-        except subprocess.TimeoutExpired:
-            return jsonify({"error": "Model execution timed out after 5 minutes"}), 408
-        except Exception as e:
-            return jsonify({"error": f"Execution error: {str(e)}"}), 500
     except Exception as e:
         return jsonify({"error": f"Server error: {str(e)}"}), 500
@@ -191,7 +216,8 @@ def get_models():
         }
     return jsonify({
         "available_models": models_info,
-        "required_columns": ["timestamp", "src_ip", "dst_ip", "src_port", "dst_port"]
     }), 200
 if __name__ == '__main__':

         if not data:
             return jsonify({"error": "No JSON data provided"}), 400
         file_data = data.get('file')
+        if not file_data:
+            return jsonify({"error": "file is required"}), 400
         # Validate input data
         is_valid, validation_msg = validate_input_data(file_data)
         if not is_valid:
             return jsonify({"error": f"Invalid input data: {validation_msg}"}), 400
+        # Count packets and unique flows
+        num_packets = len(file_data)
+        flows = set()
+        for row in file_data:
+            flow_key = (row['src_ip'], row['src_port'], row['dst_ip'], row['dst_port'])
+            flows.add(flow_key)
+        num_flows = len(flows)
         # Create temporary CSV file with unique name
         temp_filename = f"temp_input_{unique_id}.csv"
             writer.writeheader()
             writer.writerows(file_data)
+        # Run all models
+        results = {
+            "success": True,
+            "packets": {
+                "total": num_packets,
+                "unique_flows": num_flows
+            },
+            "models": {}
+        }
+        for model_type, model_config in MODEL_CONFIGS.items():
+            model_file = model_config['file']
+            # Check if model file exists
+            if not os.path.exists(model_file):
+                results["models"][model_type] = {
+                    "success": False,
+                    "error": f"Model file {model_file} not found"
                 }
+                continue
+            try:
+                # Handle different model interfaces
+                if model_config['interface'] == 'argparse':
+                    # For XGB_lstm.py which uses --logfile argument
+                    cmd = ['python', model_file, '--logfile', temp_filename]
+                else:
+                    # For models that expect hardcoded filename
+                    expected_filename = "network_logs.csv"
+                    backup_filename = None
+                    # Backup existing file if it exists
+                    if os.path.exists(expected_filename):
+                        backup_filename = f"backup_{expected_filename}_{unique_id}"
+                        os.rename(expected_filename, backup_filename)
+                    # Create symlink or copy
                     try:
+                        os.symlink(os.path.abspath(temp_filename), expected_filename)
+                    except OSError:
+                        # Fallback to copy if symlink fails
+                        import shutil
+                        shutil.copy2(temp_filename, expected_filename)
+                    cmd = ['python', model_file]
+                # Run the model
+                result = subprocess.run(
+                    cmd,
+                    capture_output=True,
+                    text=True,
+                    timeout=300,  # 5 minute timeout
+                    cwd=os.getcwd()
+                )
+                # Clean up hardcoded file if used
+                if model_config['interface'] == 'hardcoded':
+                    if os.path.exists("network_logs.csv"):
+                        os.unlink("network_logs.csv")
+                    if backup_filename and os.path.exists(backup_filename):
+                        os.rename(backup_filename, "network_logs.csv")
+                if result.returncode == 0:
+                    # Try to read output file if it exists
+                    output_files = {
+                        'lightGBM': 'lightgbm_breach_predictions.csv',
+                        'autoencoder': 'breach_predictions.csv',
+                        'XGB_lstm': 'xgb_lstm_predictions.csv'
+                    }
+                    output_data = None
+                    output_file = output_files.get(model_type)
+                    if output_file and os.path.exists(output_file):
+                        try:
+                            import pandas as pd
+                            df = pd.read_csv(output_file)
+                            output_data = df.to_dict('records')
+                            # Rename output file to avoid conflicts
+                            os.rename(output_file, f"{unique_id}_{output_file}")
+                        except Exception as e:
+                            print(f"Warning: Could not read output file: {e}")
+                    results["models"][model_type] = {
+                        "success": True,
+                        "output": result.stdout,
+                        "predictions": output_data,
+                        "error": result.stderr if result.stderr else None
+                    }
+                else:
+                    results["models"][model_type] = {
+                        "success": False,
+                        "output": result.stdout,
+                        "error": result.stderr
+                    }
+                    results["success"] = False
+            except subprocess.TimeoutExpired:
+                results["models"][model_type] = {
                     "success": False,
+                    "error": f"Model execution timed out after 5 minutes"
+                }
+                results["success"] = False
+            except Exception as e:
+                results["models"][model_type] = {
+                    "success": False,
+                    "error": f"Execution error: {str(e)}"
+                }
+                results["success"] = False
+        # Clean up temp file
+        if os.path.exists(temp_filename):
+            os.unlink(temp_filename)
+        status_code = 200 if results["success"] else 207  # 207 Multi-Status for partial success
+        return jsonify(results), status_code
     except Exception as e:
         return jsonify({"error": f"Server error: {str(e)}"}), 500
         }
     return jsonify({
         "available_models": models_info,
+        "required_columns": ["timestamp", "src_ip", "dst_ip", "src_port", "dst_port"],
+        "note": "All available models will run automatically. No need to specify model_type."
     }), 200
 if __name__ == '__main__':