Spaces:
Runtime error
Runtime error
| import sys | |
| import os | |
| import time | |
| import json | |
| import logging | |
| import joblib | |
| import numpy as np | |
| import pandas as pd | |
| from flask import Flask, request, jsonify | |
| from sklearn.pipeline import Pipeline | |
| from features.feature_builder import build_features | |
| from schemas.request_schema import PredictRequest | |
| # ====================== | |
| # PATH SETUP | |
| # ====================== | |
| BASE_DIR = os.path.dirname(os.path.abspath(__file__)) | |
| sys.path.insert(0, BASE_DIR) | |
| # ====================== | |
| # APP INIT | |
| # ====================== | |
| app = Flask(__name__) | |
| # ====================== | |
| # LOGGING | |
| # ====================== | |
| logging.basicConfig( | |
| level=logging.INFO, | |
| format="%(asctime)s - %(levelname)s - %(message)s" | |
| ) | |
| # ====================== | |
| # SECURITY CONFIG | |
| # ====================== | |
| API_KEY = os.getenv("FRAUD_API_KEY") | |
| MAX_REQUEST_SIZE = 10_000 # 10 KB | |
| RATE_LIMIT = 30 | |
| RATE_LIMIT_WINDOW = 60 # seconds | |
| rate_limit_store = {} | |
| def is_rate_limited(client_ip: str) -> bool: | |
| now = time.time() | |
| if client_ip not in rate_limit_store: | |
| rate_limit_store[client_ip] = [] | |
| rate_limit_store[client_ip] = [ | |
| t for t in rate_limit_store[client_ip] | |
| if now - t < RATE_LIMIT_WINDOW | |
| ] | |
| if len(rate_limit_store[client_ip]) >= RATE_LIMIT: | |
| return True | |
| rate_limit_store[client_ip].append(now) | |
| return False | |
| # ====================== | |
| # GLOBAL API KEY GUARD | |
| # ====================== | |
| def check_api_key(): | |
| # Health endpoint is public | |
| # if request.path == "/health": | |
| # return | |
| # # Skip static files if any | |
| # if request.path.startswith("/static"): | |
| # return | |
| # if not API_KEY: | |
| # logging.error("FRAUD_API_KEY environment variable not set") | |
| # return jsonify({"error": "Server misconfigured"}), 500 | |
| # client_key = request.headers.get("X-API-KEY") | |
| # if not client_key or client_key != API_KEY: | |
| # return jsonify({"error": "Unauthorized"}), 401 | |
| return None | |
| # ====================== | |
| # LOAD MODEL & PREPROCESSOR | |
| # ====================== | |
| MODEL_PATH = os.path.join(BASE_DIR, "models", "ensemble_model_enhanced.joblib") | |
| PREPROCESSOR_PATH = os.path.join(BASE_DIR, "models", "preprocessor_enhanced.joblib") | |
| if not os.path.exists(MODEL_PATH): | |
| raise FileNotFoundError(f"Model tidak ditemukan: {MODEL_PATH}") | |
| if not os.path.exists(PREPROCESSOR_PATH): | |
| raise FileNotFoundError(f"Preprocessor tidak ditemukan: {PREPROCESSOR_PATH}") | |
| model = joblib.load(MODEL_PATH) | |
| preprocessor = joblib.load(PREPROCESSOR_PATH) | |
| def ping(): | |
| return {"ping": "pong"} | |
| pipeline_model = Pipeline([ | |
| ("preprocess", preprocessor), | |
| ("classifier", model) | |
| ]) | |
| THRESHOLD = 0.6 | |
| # ====================== | |
| # HEALTH CHECK | |
| # ====================== | |
| def health(): | |
| return jsonify({ | |
| "status": "ok", | |
| "model_loaded": model is not None, | |
| "timestamp": time.time() | |
| }) | |
| # ====================== | |
| # PREDICT | |
| # ====================== | |
| def predict(): | |
| start_time = time.time() | |
| # ---------- REQUEST SIZE ---------- | |
| if request.content_length and request.content_length > MAX_REQUEST_SIZE: | |
| return jsonify({"error": "Request too large"}), 413 | |
| # ---------- RATE LIMIT ---------- | |
| client_ip = request.remote_addr or "unknown" | |
| if is_rate_limited(client_ip): | |
| return jsonify({"error": "Too many requests"}), 429 | |
| # ---------- PARSE & VALIDATE ---------- | |
| try: | |
| payload = request.get_json() | |
| req = PredictRequest(**payload) | |
| data = req.model_dump() | |
| logging.info("Request valid: %s", data) | |
| except Exception as e: | |
| return jsonify({ | |
| "error": "Invalid request schema", | |
| "detail": str(e) | |
| }), 422 | |
| # ---------- BUSINESS VALIDATION ---------- | |
| amount = data.get("amount", 0) | |
| location = data.get("location", -1) | |
| if amount <= 0 or amount > 100_000_000: | |
| return jsonify({"error": "Invalid amount value"}), 400 | |
| if location < 0: | |
| return jsonify({"error": "Invalid location value"}), 400 | |
| # ---------- FEATURE BUILD ---------- | |
| try: | |
| X_df = build_features(data) | |
| logging.info("Feature DF: %s", X_df.to_dict(orient="records")) | |
| except Exception as e: | |
| logging.error(f"Feature building error: {e}") | |
| return jsonify({ | |
| "error": "Feature building error", | |
| "detail": str(e) | |
| }), 500 | |
| # ---------- PREDICT ---------- | |
| try: | |
| X = preprocessor.transform(X_df) | |
| fraud_prob = model.predict_proba(X)[0][1] | |
| except Exception as e: | |
| logging.error(f"Prediction error: {e}") | |
| return jsonify({ | |
| "error": "Preprocessing or prediction error", | |
| "detail": str(e) | |
| }), 500 | |
| # ---------- DECISION ---------- | |
| is_fraud = fraud_prob >= THRESHOLD | |
| if fraud_prob >= 0.85: | |
| decision = "BLOCK" | |
| elif fraud_prob >= THRESHOLD: | |
| decision = "REVIEW" | |
| else: | |
| decision = "ALLOW" | |
| latency_ms = round((time.time() - start_time) * 1000, 2) | |
| # ---------- STRUCTURED LOG ---------- | |
| logging.info({ | |
| "event": "fraud_decision", | |
| "fraud_probability": float(fraud_prob), | |
| "decision": decision, | |
| "threshold": THRESHOLD, | |
| "latency_ms": latency_ms | |
| }) | |
| return jsonify({ | |
| "fraud_probability": round(float(fraud_prob), 4), | |
| "is_fraud": bool(is_fraud), | |
| "decision": decision, | |
| "latency_ms": latency_ms | |
| }) | |
| # ====================== | |
| # RUN SERVER | |
| # ====================== | |
| if __name__ == "__main__": | |
| app.run( | |
| host="0.0.0.0", | |
| port=int(os.environ.get("PORT", 7860)), | |
| debug=False | |
| ) | |