Samarth Naik commited on
Commit
946c35d
·
1 Parent(s): 91e5e21

Simplify to minimal health check API for debugging

Browse files
Files changed (2) hide show
  1. app.py +1 -188
  2. requirements.txt +1 -10
app.py CHANGED
@@ -1,201 +1,14 @@
1
- from flask import Flask, request, jsonify
2
  from flask_cors import CORS
3
- import subprocess
4
- import json
5
- import csv
6
- import os
7
- import tempfile
8
- import uuid
9
- from pathlib import Path
10
 
11
  app = Flask(__name__)
12
  CORS(app)
13
 
14
- # Supported model types and their interfaces
15
- MODEL_CONFIGS = {
16
- 'lightGBM': {'file': 'lightGBM.py', 'interface': 'hardcoded'},
17
- 'autoencoder': {'file': 'autoencoder.py', 'interface': 'hardcoded'},
18
- 'XGB_lstm': {'file': 'XGB_lstm.py', 'interface': 'argparse'}
19
- }
20
-
21
- def validate_input_data(file_data):
22
- """Validate the input CSV data structure"""
23
- if not isinstance(file_data, list) or len(file_data) == 0:
24
- return False, "File data must be a non-empty list"
25
-
26
- # Check if all rows have the same keys
27
- first_row_keys = set(file_data[0].keys())
28
- for i, row in enumerate(file_data[1:], 1):
29
- if set(row.keys()) != first_row_keys:
30
- return False, f"Row {i+1} has different columns than the first row"
31
-
32
- # Basic validation for expected network log columns
33
- required_columns = {'timestamp', 'src_ip', 'dst_ip', 'src_port', 'dst_port'}
34
- if not required_columns.issubset(first_row_keys):
35
- return False, f"Missing required columns: {required_columns - first_row_keys}"
36
-
37
- return True, "Valid"
38
-
39
- @app.route('/compute', methods=['POST'])
40
- def compute():
41
- temp_filename = None
42
- unique_id = str(uuid.uuid4())[:8]
43
-
44
- try:
45
- data = request.get_json()
46
- if not data:
47
- return jsonify({"error": "No JSON data provided"}), 400
48
-
49
- model_type = data.get('model_type')
50
- file_data = data.get('file')
51
-
52
- if not model_type or not file_data:
53
- return jsonify({"error": "model_type and file are required"}), 400
54
-
55
- # Validate model type
56
- if model_type not in MODEL_CONFIGS:
57
- return jsonify({
58
- "error": f"Unsupported model type. Available: {list(MODEL_CONFIGS.keys())}"
59
- }), 400
60
-
61
- # Validate input data
62
- is_valid, validation_msg = validate_input_data(file_data)
63
- if not is_valid:
64
- return jsonify({"error": f"Invalid input data: {validation_msg}"}), 400
65
-
66
- model_config = MODEL_CONFIGS[model_type]
67
- model_file = model_config['file']
68
-
69
- # Check if model file exists
70
- if not os.path.exists(model_file):
71
- return jsonify({"error": f"Model file {model_file} not found"}), 404
72
-
73
- # Create temporary CSV file with unique name
74
- temp_filename = f"temp_input_{unique_id}.csv"
75
-
76
- # Convert JSON to CSV
77
- fieldnames = file_data[0].keys()
78
- with open(temp_filename, 'w', newline='') as temp_file:
79
- writer = csv.DictWriter(temp_file, fieldnames=fieldnames)
80
- writer.writeheader()
81
- writer.writerows(file_data)
82
-
83
- try:
84
- # Handle different model interfaces
85
- if model_config['interface'] == 'argparse':
86
- # For XGB_lstm.py which uses --logfile argument
87
- cmd = ['python', model_file, '--logfile', temp_filename]
88
- else:
89
- # For models that expect hardcoded filename, create a symlink
90
- expected_filename = "network_logs.csv"
91
- backup_filename = None
92
-
93
- # Backup existing file if it exists
94
- if os.path.exists(expected_filename):
95
- backup_filename = f"backup_{expected_filename}_{unique_id}"
96
- os.rename(expected_filename, backup_filename)
97
-
98
- # Create symlink or copy
99
- try:
100
- os.symlink(os.path.abspath(temp_filename), expected_filename)
101
- except OSError:
102
- # Fallback to copy if symlink fails
103
- import shutil
104
- shutil.copy2(temp_filename, expected_filename)
105
-
106
- cmd = ['python', model_file]
107
-
108
- # Run the model
109
- result = subprocess.run(
110
- cmd,
111
- capture_output=True,
112
- text=True,
113
- timeout=300, # 5 minute timeout
114
- cwd=os.getcwd()
115
- )
116
-
117
- # Clean up hardcoded file if used
118
- if model_config['interface'] == 'hardcoded':
119
- if os.path.exists("network_logs.csv"):
120
- os.unlink("network_logs.csv")
121
- if backup_filename and os.path.exists(backup_filename):
122
- os.rename(backup_filename, "network_logs.csv")
123
-
124
- # Clean up temp file
125
- if os.path.exists(temp_filename):
126
- os.unlink(temp_filename)
127
-
128
- if result.returncode == 0:
129
- # Try to read output file if it exists
130
- output_files = {
131
- 'lightGBM': 'lightgbm_breach_predictions.csv',
132
- 'autoencoder': 'breach_predictions.csv',
133
- 'XGB_lstm': 'xgb_lstm_predictions.csv'
134
- }
135
-
136
- output_data = None
137
- output_file = output_files.get(model_type)
138
- if output_file and os.path.exists(output_file):
139
- try:
140
- import pandas as pd
141
- df = pd.read_csv(output_file)
142
- output_data = df.to_dict('records')
143
- # Rename output file to avoid conflicts
144
- os.rename(output_file, f"{unique_id}_{output_file}")
145
- except Exception as e:
146
- print(f"Warning: Could not read output file: {e}")
147
-
148
- return jsonify({
149
- "success": True,
150
- "output": result.stdout,
151
- "predictions": output_data,
152
- "error": result.stderr if result.stderr else None
153
- })
154
- else:
155
- return jsonify({
156
- "success": False,
157
- "output": result.stdout,
158
- "error": result.stderr
159
- }), 500
160
-
161
- except subprocess.TimeoutExpired:
162
- return jsonify({"error": "Model execution timed out after 5 minutes"}), 408
163
-
164
- except Exception as e:
165
- return jsonify({"error": f"Execution error: {str(e)}"}), 500
166
-
167
- except Exception as e:
168
- return jsonify({"error": f"Server error: {str(e)}"}), 500
169
-
170
- finally:
171
- # Ensure cleanup
172
- if temp_filename and os.path.exists(temp_filename):
173
- try:
174
- os.unlink(temp_filename)
175
- except:
176
- pass
177
-
178
  @app.route('/health', methods=['GET'])
179
  def health():
180
  return jsonify({"status": "healthy"})
181
 
182
- @app.route('/models', methods=['GET'])
183
- def get_models():
184
- """Return available models and their info"""
185
- models_info = {}
186
- for model_type, config in MODEL_CONFIGS.items():
187
- models_info[model_type] = {
188
- "file": config["file"],
189
- "available": os.path.exists(config["file"]),
190
- "interface": config["interface"]
191
- }
192
- return jsonify({
193
- "available_models": models_info,
194
- "required_columns": ["timestamp", "src_ip", "dst_ip", "src_port", "dst_port"]
195
- })
196
-
197
  if __name__ == '__main__':
198
- # Use gunicorn in production, this is just for local development
199
  import os
200
  port = int(os.environ.get('PORT', 5000))
201
  app.run(host='0.0.0.0', port=port, debug=False, threaded=True)
 
1
+ from flask import Flask, jsonify
2
  from flask_cors import CORS
 
 
 
 
 
 
 
3
 
4
  app = Flask(__name__)
5
  CORS(app)
6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
  @app.route('/health', methods=['GET'])
8
  def health():
9
  return jsonify({"status": "healthy"})
10
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  if __name__ == '__main__':
 
12
  import os
13
  port = int(os.environ.get('PORT', 5000))
14
  app.run(host='0.0.0.0', port=port, debug=False, threaded=True)
requirements.txt CHANGED
@@ -1,12 +1,3 @@
1
- pandas
2
- numpy
3
- scipy
4
- scikit-learn
5
- tensorflow
6
- lightgbm
7
- xgboost
8
  flask
9
  flask-cors
10
- gunicorn
11
- python-dotenv
12
- supabase
 
 
 
 
 
 
 
 
1
  flask
2
  flask-cors
3
+ gunicorn