Rizwan9 commited on
Commit
d5c01b2
·
verified ·
1 Parent(s): 442d3a8

Upload app.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. app.py +28 -134
app.py CHANGED
@@ -1,148 +1,42 @@
1
- from flask import Flask, request, jsonify, make_response
2
- import joblib, pandas as pd, numpy as np, os, sys, time, traceback
3
- from sklearn.pipeline import Pipeline
4
 
5
- app = Flask(__name__)
6
-
7
- # -----------------------------
8
- # Config & Model Loading
9
- # -----------------------------
10
- MODEL_PATH = os.getenv("MODEL_PATH", "best_model_random_forest.joblib")
11
- PORT = int(os.getenv("PORT", "7860"))
12
-
13
- print(f"==> [BOOT] Starting Flask app on port {PORT}")
14
- print(f"==> [BOOT] MODEL_PATH={MODEL_PATH}", flush=True)
15
-
16
- def load_pipeline(path: str):
17
- t0 = time.time()
18
- if not os.path.exists(path):
19
- raise FileNotFoundError(f"Model file not found: {path}")
20
- print(f"==> [LOAD] Loading model from {path} ...", flush=True)
21
- obj = joblib.load(path)
22
- if isinstance(obj, dict) and "pipeline" in obj:
23
- pipe = obj["pipeline"]
24
- print("==> [LOAD] Loaded dict bundle with 'pipeline'", flush=True)
25
- else:
26
- pipe = obj
27
- print("==> [LOAD] Loaded pipeline object (no bundle key)", flush=True)
28
- print(f"==> [LOAD] Done in {time.time()-t0:.2f}s", flush=True)
29
- return pipe
30
-
31
- try:
32
- pipe = load_pipeline(MODEL_PATH)
33
- MODEL_READY, LOAD_ERROR = True, None
34
- except Exception as e:
35
- pipe, MODEL_READY, LOAD_ERROR = None, False, str(e)
36
- print("==> [ERROR] Model load failed:", LOAD_ERROR, file=sys.stderr, flush=True)
37
 
38
- # -----------------------------
39
- # Utils
40
- # -----------------------------
41
- def sanitize_inputs(df: pd.DataFrame) -> pd.DataFrame:
42
- df = df.copy()
43
- # Strings → Title-case (fixes 'low sugar' vs 'Low Sugar'), trimmed
44
- for col in df.select_dtypes(include="object").columns:
45
- df[col] = df[col].astype(str).str.strip().str.title()
46
- # Numerics → coerce
47
- for col in df.columns:
48
- if df[col].dtype.kind in "biufc": # numeric-like
49
- df[col] = pd.to_numeric(df[col], errors="coerce").fillna(0)
50
- return df
51
-
52
- def expected_feature_names():
53
- # scikit-learn >=1.0 usually exposes this on the fitted estimator or pipeline
54
- names = getattr(pipe, "feature_names_in_", None)
55
- if names is not None:
56
- return list(names)
57
- # Fallback: try to infer from first step if it’s a Pipeline
58
- if isinstance(pipe, Pipeline):
59
- first = pipe.steps[0][1]
60
- names = getattr(first, "feature_names_in_", None)
61
- if names is not None:
62
- return list(names)
63
- return None # unknown
64
-
65
- # -----------------------------
66
- # CORS (no dependency)
67
- # -----------------------------
68
- @app.after_request
69
- def add_cors_headers(resp):
70
- resp.headers["Access-Control-Allow-Origin"] = "*"
71
- resp.headers["Access-Control-Allow-Headers"] = "Content-Type, Authorization"
72
- resp.headers["Access-Control-Allow-Methods"] = "GET, POST, OPTIONS"
73
- return resp
74
-
75
- # -----------------------------
76
- # Basic routes
77
- # -----------------------------
78
- @app.get("/")
79
- def root():
80
- return jsonify({"service": "SuperKart Sales Forecast API",
81
- "health": "/health", "predict": "/predict", "schema": "/schema",
82
- "model_path": MODEL_PATH})
83
 
84
- @app.get("/health")
85
- def health():
86
- if MODEL_READY:
87
- return jsonify({"status": "ok", "model_path": MODEL_PATH}), 200
88
- return jsonify({"status": "error", "error": LOAD_ERROR, "model_path": MODEL_PATH}), 500
89
 
90
- @app.get("/schema")
91
- def schema():
92
- info = {
93
- "model_ready": MODEL_READY,
94
- "model_path": MODEL_PATH,
95
- "estimator_type": type(pipe).__name__ if pipe is not None else None,
96
- "expected_feature_names": expected_feature_names()
97
- }
98
- return jsonify(info), 200 if MODEL_READY else 500
99
 
100
- @app.route("/predict", methods=["OPTIONS"])
101
- def predict_options():
102
- return make_response(("", 204))
103
 
104
- # -----------------------------
105
- # Predict
106
- # -----------------------------
107
- @app.post("/predict")
108
  def predict():
109
- if not MODEL_READY or pipe is None:
110
- return jsonify({"error": "Model not loaded", "details": LOAD_ERROR}), 503
111
-
112
  try:
113
- payload = request.get_json(force=True)
114
- if payload is None:
115
- return jsonify({"error": "No JSON received"}), 400
 
116
 
117
- df = pd.DataFrame([payload]) if isinstance(payload, dict) else pd.DataFrame(payload)
118
- df = sanitize_inputs(df)
119
 
120
- # If the estimator exposes expected input feature names, validate quickly
121
- expected = expected_feature_names()
122
- if expected:
123
- missing = [c for c in expected if c not in df.columns]
124
- extra = [c for c in df.columns if c not in expected]
125
- if missing:
126
- return jsonify({
127
- "error": "Missing required columns",
128
- "missing": missing,
129
- "expected": expected,
130
- "received": list(df.columns)
131
- }), 400
132
- if extra:
133
- # Not fatal, but good to know
134
- print(f"==> [WARN] Extra columns received that model does not expect: {extra}", flush=True)
135
- # Align column order if needed
136
- df = df[expected]
137
 
138
- preds = pipe.predict(df)
139
- preds = [float(p) if isinstance(p, (np.floating, float, int)) else p for p in preds]
140
- return jsonify({"predictions": preds, "rows_received": len(df)})
141
 
 
 
142
  except Exception as e:
143
- print("==> [ERROR] Prediction failed:\n", traceback.format_exc(), flush=True)
144
- return jsonify({"error": "Prediction failed", "details": str(e)}), 500
145
 
146
- if __name__ == "__main__":
147
- print("==> [RUN] Flask dev server starting...", flush=True)
148
- app.run(host="0.0.0.0", port=PORT)
 
 
 
 
1
 
2
+ from flask import Flask, request, jsonify
3
+ import joblib
4
+ import pandas as pd
5
+ import numpy as np
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
 
7
+ app = Flask(__name__)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
 
9
+ # Load the serialized model bundle
10
+ BUNDLE_FILENAME = 'best_model_random_forest.joblib'
11
+ bundle = joblib.load(BUNDLE_FILENAME)
12
+ model = bundle['model']
13
+ feature_cols = bundle['feature_cols']
14
 
 
 
 
 
 
 
 
 
 
15
 
16
+ @app.route('/')
17
+ def home():
18
+ return "Sales Forecasting Backend is running!"
19
 
20
+ @app.route('/predict', methods=['POST'])
 
 
 
21
  def predict():
 
 
 
22
  try:
23
+ data = request.get_json(force=True)
24
+ # Convert the incoming data to a pandas DataFrame
25
+ # Assuming the incoming data is a list of dictionaries, where each dictionary is a data point
26
+ input_data = pd.DataFrame(data)
27
 
28
+ # Align columns with the training data, adding missing columns with a default value (e.g., 0 or NaN)
29
+ input_data_processed = input_data.reindex(columns=feature_cols, fill_value=0)
30
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31
 
32
+ # Make predictions
33
+ predictions = model.predict(input_data_processed)
 
34
 
35
+ # Return predictions as a JSON response
36
+ return jsonify(predictions.tolist())
37
  except Exception as e:
38
+ return jsonify({'error': str(e)})
 
39
 
40
+ if __name__ == '__main__':
41
+ # Running on 0.0.0.0 makes it accessible externally, useful for deployment
42
+ app.run(host='0.0.0.0', port=5000)