Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
| 1 |
-
#
|
| 2 |
from flask import Flask, request, jsonify
|
| 3 |
from flask_cors import CORS
|
| 4 |
import pandas as pd
|
|
@@ -81,14 +81,18 @@ def initialize():
|
|
| 81 |
global FEATURE_COLUMNS
|
| 82 |
try:
|
| 83 |
# Load models
|
|
|
|
| 84 |
rf_model = joblib.load('phishing_detector_rf_grega.pkl')
|
| 85 |
xgb_model = joblib.load('phishing_detector_xgb_grega.pkl')
|
| 86 |
scaler = joblib.load('feature_scaler_grega.pkl')
|
|
|
|
| 87 |
|
| 88 |
# Load feature columns
|
|
|
|
| 89 |
url = "https://raw.githubusercontent.com/GregaVrbancic/Phishing-Dataset/master/dataset_small.csv"
|
| 90 |
df = pd.read_csv(url)
|
| 91 |
FEATURE_COLUMNS = df.drop('phishing', axis=1).columns.tolist()
|
|
|
|
| 92 |
|
| 93 |
return rf_model, xgb_model, scaler
|
| 94 |
except Exception as e:
|
|
@@ -100,7 +104,7 @@ print("Initializing models...")
|
|
| 100 |
rf_model, xgb_model, scaler = initialize()
|
| 101 |
if not rf_model:
|
| 102 |
print("Failed to initialize models!")
|
| 103 |
-
|
| 104 |
|
| 105 |
# Fast DNS lookup with caching
|
| 106 |
@lru_cache(maxsize=5000)
|
|
@@ -391,8 +395,12 @@ def predict_phishing_fast(url):
|
|
| 391 |
}
|
| 392 |
|
| 393 |
# API endpoint for checking URLs
|
| 394 |
-
@app.route('/api/check', methods=['POST'])
|
| 395 |
def check_url():
|
|
|
|
|
|
|
|
|
|
|
|
|
| 396 |
try:
|
| 397 |
# Get JSON data from request
|
| 398 |
data = request.get_json()
|
|
@@ -434,24 +442,41 @@ def health_check():
|
|
| 434 |
return jsonify({
|
| 435 |
'status': 'healthy',
|
| 436 |
'message': 'Phishing detection API is running',
|
| 437 |
-
'models_loaded': rf_model is not None and xgb_model is not None and scaler is not None
|
|
|
|
| 438 |
})
|
| 439 |
|
| 440 |
-
# Root endpoint
|
| 441 |
@app.route('/', methods=['GET'])
|
| 442 |
def root():
|
| 443 |
return jsonify({
|
| 444 |
'message': 'Phishing Detection API',
|
| 445 |
'version': '1.0.0',
|
|
|
|
| 446 |
'endpoints': {
|
| 447 |
'check': '/api/check (POST)',
|
| 448 |
'health': '/health (GET)'
|
| 449 |
},
|
| 450 |
'usage': {
|
| 451 |
'check': 'POST {"url": "https://example.com"} to /api/check'
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 452 |
}
|
| 453 |
})
|
| 454 |
|
| 455 |
# Run the app
|
| 456 |
if __name__ == '__main__':
|
| 457 |
-
|
|
|
|
|
|
| 1 |
+
# app.py
|
| 2 |
from flask import Flask, request, jsonify
|
| 3 |
from flask_cors import CORS
|
| 4 |
import pandas as pd
|
|
|
|
| 81 |
global FEATURE_COLUMNS
|
| 82 |
try:
|
| 83 |
# Load models
|
| 84 |
+
print("Loading models...")
|
| 85 |
rf_model = joblib.load('phishing_detector_rf_grega.pkl')
|
| 86 |
xgb_model = joblib.load('phishing_detector_xgb_grega.pkl')
|
| 87 |
scaler = joblib.load('feature_scaler_grega.pkl')
|
| 88 |
+
print("Models loaded successfully!")
|
| 89 |
|
| 90 |
# Load feature columns
|
| 91 |
+
print("Loading feature columns...")
|
| 92 |
url = "https://raw.githubusercontent.com/GregaVrbancic/Phishing-Dataset/master/dataset_small.csv"
|
| 93 |
df = pd.read_csv(url)
|
| 94 |
FEATURE_COLUMNS = df.drop('phishing', axis=1).columns.tolist()
|
| 95 |
+
print(f"Feature columns loaded: {len(FEATURE_COLUMNS)} features")
|
| 96 |
|
| 97 |
return rf_model, xgb_model, scaler
|
| 98 |
except Exception as e:
|
|
|
|
| 104 |
rf_model, xgb_model, scaler = initialize()
|
| 105 |
if not rf_model:
|
| 106 |
print("Failed to initialize models!")
|
| 107 |
+
# We'll continue and return error messages in the API
|
| 108 |
|
| 109 |
# Fast DNS lookup with caching
|
| 110 |
@lru_cache(maxsize=5000)
|
|
|
|
| 395 |
}
|
| 396 |
|
| 397 |
# API endpoint for checking URLs
|
| 398 |
+
@app.route('/api/check', methods=['POST', 'OPTIONS'])
|
| 399 |
def check_url():
|
| 400 |
+
# Handle OPTIONS request for CORS
|
| 401 |
+
if request.method == 'OPTIONS':
|
| 402 |
+
return '', 200
|
| 403 |
+
|
| 404 |
try:
|
| 405 |
# Get JSON data from request
|
| 406 |
data = request.get_json()
|
|
|
|
| 442 |
return jsonify({
|
| 443 |
'status': 'healthy',
|
| 444 |
'message': 'Phishing detection API is running',
|
| 445 |
+
'models_loaded': rf_model is not None and xgb_model is not None and scaler is not None,
|
| 446 |
+
'numpy_version': np.__version__
|
| 447 |
})
|
| 448 |
|
| 449 |
+
# Root endpoint with API documentation
|
| 450 |
@app.route('/', methods=['GET'])
|
| 451 |
def root():
|
| 452 |
return jsonify({
|
| 453 |
'message': 'Phishing Detection API',
|
| 454 |
'version': '1.0.0',
|
| 455 |
+
'numpy_version': np.__version__,
|
| 456 |
'endpoints': {
|
| 457 |
'check': '/api/check (POST)',
|
| 458 |
'health': '/health (GET)'
|
| 459 |
},
|
| 460 |
'usage': {
|
| 461 |
'check': 'POST {"url": "https://example.com"} to /api/check'
|
| 462 |
+
},
|
| 463 |
+
'example': {
|
| 464 |
+
'request': {
|
| 465 |
+
'url': 'http://paypal.secure.login-update.com'
|
| 466 |
+
},
|
| 467 |
+
'response': {
|
| 468 |
+
'success': True,
|
| 469 |
+
'data': {
|
| 470 |
+
'url': 'http://paypal.secure.login-update.com',
|
| 471 |
+
'is_phishing': True,
|
| 472 |
+
'confidence': 'High',
|
| 473 |
+
'avg_phishing_probability': 0.9137
|
| 474 |
+
}
|
| 475 |
+
}
|
| 476 |
}
|
| 477 |
})
|
| 478 |
|
| 479 |
# Run the app
|
| 480 |
if __name__ == '__main__':
|
| 481 |
+
port = int(os.environ.get('PORT', 7860))
|
| 482 |
+
app.run(host='0.0.0.0', port=port)
|