Spaces:

samarthnaikk
/

amide-models

Sleeping

File size: 8,712 Bytes

from flask import Flask, jsonify, request
from flask_cors import CORS
import subprocess
import csv
import os
import tempfile
import uuid

app = Flask(__name__)
CORS(app)

# Supported model types and their interfaces
MODEL_CONFIGS = {
    'lightGBM': {'file': 'lightGBM.py', 'interface': 'hardcoded'},
    'autoencoder': {'file': 'autoencoder.py', 'interface': 'hardcoded'},
    'XGB_lstm': {'file': 'XGB_lstm.py', 'interface': 'argparse'}
}

def validate_input_data(file_data):
    """Validate the input CSV data structure"""
    if not isinstance(file_data, list) or len(file_data) == 0:
        return False, "File data must be a non-empty list"
    
    # Check if all rows have the same keys
    first_row_keys = set(file_data[0].keys())
    for i, row in enumerate(file_data[1:], 1):
        if set(row.keys()) != first_row_keys:
            return False, f"Row {i+1} has different columns than the first row"
    
    # Basic validation for expected network log columns
    required_columns = {'timestamp', 'src_ip', 'dst_ip', 'src_port', 'dst_port'}
    if not required_columns.issubset(first_row_keys):
        return False, f"Missing required columns: {required_columns - first_row_keys}"
    
    return True, "Valid"

@app.route('/compute', methods=['POST'])
def compute():
    temp_filename = None
    unique_id = str(uuid.uuid4())[:8]
    
    try:
        data = request.get_json()
        if not data:
            return jsonify({"error": "No JSON data provided"}), 400
        
        file_data = data.get('file')
        
        if not file_data:
            return jsonify({"error": "file is required"}), 400
        
        # Validate input data
        is_valid, validation_msg = validate_input_data(file_data)
        if not is_valid:
            return jsonify({"error": f"Invalid input data: {validation_msg}"}), 400
        
        # Count packets and unique flows
        num_packets = len(file_data)
        flows = set()
        for row in file_data:
            flow_key = (row['src_ip'], row['src_port'], row['dst_ip'], row['dst_port'])
            flows.add(flow_key)
        num_flows = len(flows)
        
        # Create temporary CSV file with unique name
        temp_filename = f"temp_input_{unique_id}.csv"
        
        # Convert JSON to CSV
        fieldnames = file_data[0].keys()
        with open(temp_filename, 'w', newline='') as temp_file:
            writer = csv.DictWriter(temp_file, fieldnames=fieldnames)
            writer.writeheader()
            writer.writerows(file_data)
        
        # Run all models
        results = {
            "success": True,
            "packets": {
                "total": num_packets,
                "unique_flows": num_flows
            },
            "models": {}
        }
        
        for model_type, model_config in MODEL_CONFIGS.items():
            model_file = model_config['file']
            
            # Check if model file exists
            if not os.path.exists(model_file):
                results["models"][model_type] = {
                    "success": False,
                    "error": f"Model file {model_file} not found"
                }
                continue
            
            try:
                # Handle different model interfaces
                if model_config['interface'] == 'argparse':
                    # For XGB_lstm.py which uses --logfile argument
                    cmd = ['python', model_file, '--logfile', temp_filename]
                else:
                    # For models that expect hardcoded filename
                    expected_filename = "network_logs.csv"
                    backup_filename = None
                    
                    # Backup existing file if it exists
                    if os.path.exists(expected_filename):
                        backup_filename = f"backup_{expected_filename}_{unique_id}"
                        os.rename(expected_filename, backup_filename)
                    
                    # Create symlink or copy
                    try:
                        os.symlink(os.path.abspath(temp_filename), expected_filename)
                    except OSError:
                        # Fallback to copy if symlink fails
                        import shutil
                        shutil.copy2(temp_filename, expected_filename)
                    
                    cmd = ['python', model_file]
                
                # Run the model
                result = subprocess.run(
                    cmd,
                    capture_output=True,
                    text=True,
                    timeout=300,  # 5 minute timeout
                    cwd=os.getcwd()
                )
                
                # Clean up hardcoded file if used
                if model_config['interface'] == 'hardcoded':
                    if os.path.exists("network_logs.csv"):
                        os.unlink("network_logs.csv")
                    if backup_filename and os.path.exists(backup_filename):
                        os.rename(backup_filename, "network_logs.csv")
                
                if result.returncode == 0:
                    # Try to read output file if it exists
                    output_files = {
                        'lightGBM': 'lightgbm_breach_predictions.csv',
                        'autoencoder': 'breach_predictions.csv',
                        'XGB_lstm': 'xgb_lstm_predictions.csv'
                    }
                    
                    output_data = None
                    output_file = output_files.get(model_type)
                    if output_file and os.path.exists(output_file):
                        try:
                            import pandas as pd
                            df = pd.read_csv(output_file)
                            output_data = df.to_dict('records')
                            # Rename output file to avoid conflicts
                            os.rename(output_file, f"{unique_id}_{output_file}")
                        except Exception as e:
                            print(f"Warning: Could not read output file: {e}")
                    
                    results["models"][model_type] = {
                        "success": True,
                        "output": result.stdout,
                        "predictions": output_data,
                        "error": result.stderr if result.stderr else None
                    }
                else:
                    results["models"][model_type] = {
                        "success": False,
                        "output": result.stdout,
                        "error": result.stderr
                    }
                    results["success"] = False
                    
            except subprocess.TimeoutExpired:
                results["models"][model_type] = {
                    "success": False,
                    "error": f"Model execution timed out after 5 minutes"
                }
                results["success"] = False
                
            except Exception as e:
                results["models"][model_type] = {
                    "success": False,
                    "error": f"Execution error: {str(e)}"
                }
                results["success"] = False
        
        # Clean up temp file
        if os.path.exists(temp_filename):
            os.unlink(temp_filename)
        
        status_code = 200 if results["success"] else 207  # 207 Multi-Status for partial success
        return jsonify(results), status_code
    
    except Exception as e:
        return jsonify({"error": f"Server error: {str(e)}"}), 500
    
    finally:
        # Ensure cleanup
        if temp_filename and os.path.exists(temp_filename):
            try:
                os.unlink(temp_filename)
            except:
                pass

@app.route('/health', methods=['GET'])
def health():
    return jsonify({"status": "healthy"})

@app.route('/models', methods=['GET'])
def get_models():
    """Return available models and their info"""
    models_info = {}
    for model_type, config in MODEL_CONFIGS.items():
        models_info[model_type] = {
            "file": config["file"],
            "available": os.path.exists(config["file"]),
            "interface": config["interface"]
        }
    return jsonify({
        "available_models": models_info,
        "required_columns": ["timestamp", "src_ip", "dst_ip", "src_port", "dst_port"],
        "note": "All available models will run automatically. No need to specify model_type."
    }), 200

if __name__ == '__main__':
    import os
    port = int(os.environ.get('PORT', 7860))
    app.run(host='0.0.0.0', port=port, debug=False, threaded=True)