SabarnaDeb commited on
Commit
e1bf645
·
verified ·
1 Parent(s): fd0bcb7

Upload 4 files

Browse files
Files changed (4) hide show
  1. Dockerfile +13 -0
  2. app.py +166 -0
  3. model_artifact.joblib +3 -0
  4. requirements.txt +6 -0
Dockerfile ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.11-slim
2
+
3
+ WORKDIR /app
4
+
5
+ COPY requirements.txt /app/
6
+ RUN pip install --no-cache-dir -r requirements.txt
7
+
8
+ COPY . /app/
9
+
10
+ ENV PORT=7860
11
+ EXPOSE 7860
12
+
13
+ CMD ["gunicorn", "-b", "0.0.0.0:7860", "app:app"]
app.py ADDED
@@ -0,0 +1,166 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # app.py
2
+ import os
3
+ import joblib
4
+ import numpy as np
5
+ import pandas as pd
6
+ from flask import Flask, request, jsonify
7
+
8
+ # -----------------------
9
+ # Load serialized artifact
10
+ # -----------------------
11
+ ARTIFACT_PATH = os.environ.get("ARTIFACT_PATH", "model_artifact.joblib")
12
+ artifact = joblib.load(ARTIFACT_PATH)
13
+
14
+ model = artifact["model"]
15
+ feature_order = artifact["feature_order"]
16
+ cap_bounds = artifact.get("cap_bounds", {})
17
+
18
+ # Optional security (only enforced if API_KEY is set)
19
+ API_KEY = os.environ.get("API_KEY", None)
20
+
21
+ app = Flask(__name__)
22
+
23
+
24
+ def apply_feature_engineering(df_raw: pd.DataFrame) -> pd.DataFrame:
25
+ """
26
+ Takes raw input with original columns and creates engineered columns:
27
+ - profile_completed_score (Low/Medium/High -> 1/2/3)
28
+ - media_exposure_count (sum of Yes flags)
29
+ - time_per_visit
30
+ - total_page_views_est
31
+ Then drops profile_completed (as used in training).
32
+ """
33
+ df = df_raw.copy()
34
+
35
+ # Profile mapping
36
+ profile_map = {"Low": 1, "Medium": 2, "High": 3}
37
+ df["profile_completed_score"] = df["profile_completed"].map(profile_map)
38
+
39
+ # Media exposure count from Yes/No columns
40
+ flag_cols = ["print_media_type1", "print_media_type2", "digital_media", "educational_channels", "referral"]
41
+ yesno_map = {"Yes": 1, "No": 0}
42
+ for c in flag_cols:
43
+ df[c] = df[c].astype(str)
44
+ df["media_exposure_count"] = sum(df[c].map(yesno_map) for c in flag_cols)
45
+
46
+ # Engagement features
47
+ df["website_visits"] = pd.to_numeric(df["website_visits"], errors="coerce")
48
+ df["time_spent_on_website"] = pd.to_numeric(df["time_spent_on_website"], errors="coerce")
49
+ df["page_views_per_visit"] = pd.to_numeric(df["page_views_per_visit"], errors="coerce")
50
+
51
+ df["time_per_visit"] = np.where(df["website_visits"] > 0, df["time_spent_on_website"] / df["website_visits"], 0)
52
+ df["total_page_views_est"] = df["website_visits"] * df["page_views_per_visit"]
53
+
54
+ # Drop original ordinal source column (because training used score)
55
+ if "profile_completed" in df.columns:
56
+ df = df.drop(columns=["profile_completed"])
57
+
58
+ return df
59
+
60
+
61
+ def apply_iqr_capping(df: pd.DataFrame) -> pd.DataFrame:
62
+ """Clip selected numeric columns using training-time IQR bounds saved in artifact."""
63
+ df2 = df.copy()
64
+ for col, b in cap_bounds.items():
65
+ if col in df2.columns:
66
+ df2[col] = pd.to_numeric(df2[col], errors="coerce")
67
+ df2[col] = df2[col].clip(lower=b["low"], upper=b["high"])
68
+ return df2
69
+
70
+
71
+ def validate_required_columns(df: pd.DataFrame) -> None:
72
+ required = [
73
+ "age",
74
+ "current_occupation",
75
+ "first_interaction",
76
+ "profile_completed",
77
+ "website_visits",
78
+ "time_spent_on_website",
79
+ "page_views_per_visit",
80
+ "last_activity",
81
+ "print_media_type1",
82
+ "print_media_type2",
83
+ "digital_media",
84
+ "educational_channels",
85
+ "referral",
86
+ ]
87
+ missing = [c for c in required if c not in df.columns]
88
+ if missing:
89
+ raise ValueError(f"Missing required fields: {missing}")
90
+
91
+
92
+ def build_model_input(df_raw: pd.DataFrame) -> pd.DataFrame:
93
+ """Raw JSON -> feature engineered -> capped -> ordered columns for model."""
94
+ validate_required_columns(df_raw)
95
+
96
+ df_fe = apply_feature_engineering(df_raw)
97
+ df_fe = apply_iqr_capping(df_fe)
98
+
99
+ # Keep only expected features and in correct order
100
+ df_fe = df_fe.reindex(columns=feature_order)
101
+ return df_fe
102
+
103
+
104
+ def check_api_key(req):
105
+ if API_KEY is None:
106
+ return True
107
+ return req.headers.get("x-api-key") == API_KEY
108
+
109
+
110
+ # -----------------------
111
+ # Routes
112
+ # -----------------------
113
+ @app.get("/health")
114
+ def health():
115
+ return jsonify({"status": "ok"}), 200
116
+
117
+
118
+ @app.post("/predict")
119
+ def predict():
120
+ if not check_api_key(request):
121
+ return jsonify({"error": "Unauthorized (invalid API key)"}), 401
122
+
123
+ payload = request.get_json(silent=True)
124
+ if payload is None:
125
+ return jsonify({"error": "Invalid JSON"}), 400
126
+
127
+ # Support single record (dict) OR multiple records (list of dicts)
128
+ if isinstance(payload, dict):
129
+ records = [payload]
130
+ elif isinstance(payload, list):
131
+ records = payload
132
+ else:
133
+ return jsonify({"error": "Payload must be a dict or list of dicts"}), 400
134
+
135
+ df_raw = pd.DataFrame(records)
136
+
137
+ try:
138
+ X_in = build_model_input(df_raw)
139
+
140
+ # Predict probability and class
141
+ if hasattr(model, "predict_proba"):
142
+ proba = model.predict_proba(X_in)[:, 1]
143
+ else:
144
+ # fallback
145
+ proba = model.predict(X_in).astype(float)
146
+
147
+ pred = (proba >= 0.5).astype(int)
148
+
149
+ out = []
150
+ for i in range(len(records)):
151
+ out.append({
152
+ "converted_prediction": int(pred[i]),
153
+ "conversion_probability": float(proba[i])
154
+ })
155
+
156
+ return jsonify({"predictions": out}), 200
157
+
158
+ except ValueError as ve:
159
+ return jsonify({"error": str(ve)}), 400
160
+ except Exception as e:
161
+ return jsonify({"error": "Internal server error", "details": str(e)}), 500
162
+
163
+
164
+ if __name__ == "__main__":
165
+ port = int(os.environ.get("PORT", "7860"))
166
+ app.run(host="0.0.0.0", port=port, debug=False)
model_artifact.joblib ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e5f96b4c197f918490d61b30f1990851da798df430b856d77190e46fb6173d91
3
+ size 8089
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ flask==3.0.3
2
+ gunicorn==22.0.0
3
+ joblib==1.4.2
4
+ numpy==2.0.1
5
+ pandas==2.2.2
6
+ scikit-learn==1.5.1