MHamzaShahid commited on
Commit
92c3eaa
·
verified ·
1 Parent(s): 885de0d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +150 -31
app.py CHANGED
@@ -1,50 +1,169 @@
1
  import sys
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  sys.modules['__main__'].temp_cat = temp_cat
3
  sys.modules['__main__'].proxy_humidity = proxy_humidity
4
  sys.modules['__main__'].CorrelationThresholdSelector = CorrelationThresholdSelector
5
 
6
- from fastapi import FastAPI
7
- from pydantic import BaseModel
8
- import joblib
9
- import pandas as pd
10
- import uvicorn
11
-
12
- # ✅ Initialize FastAPI app
13
- app = FastAPI(title="Crop Yield Predictor API", version="1.0")
14
 
15
- # Load your trained model
16
- model = joblib.load("CropYieldPredictor.pkl")
 
 
 
 
 
17
 
18
- # Define the expected input schema
19
  class CropInput(BaseModel):
20
- country: str
21
- crop_type: str
22
- year: int
23
- avg_rainfall: float
24
- pesticides: float
 
25
 
26
- # Home route
27
  @app.get("/")
28
  def home():
29
- return {"message": "Crop Yield Predictor API is running successfully!"}
30
 
31
- # ✅ Prediction route
32
  @app.post("/predict")
33
  def predict_yield(data: CropInput):
34
- # Convert input to DataFrame (must match training features order)
35
- input_df = pd.DataFrame([{
36
- "Country": data.country,
37
- "Crop": data.crop_type,
38
- "Year": data.year,
39
- "average_rain_fall_mm_per_year": data.avg_rainfall,
40
- "pesticides_tonnes": data.pesticides
41
- }])
42
 
43
- # Make prediction
44
- prediction = model.predict(input_df)[0]
 
 
 
45
 
46
- return {"predicted_yield": float(prediction)}
 
47
 
48
- # For local testing (won’t be used in HF)
49
  if __name__ == "__main__":
 
50
  uvicorn.run(app, host="0.0.0.0", port=7860)
 
1
  import sys
2
+ import joblib
3
+ import pandas as pd
4
+ import numpy as np
5
+ from fastapi import FastAPI
6
+ from pydantic import BaseModel
7
+ from sklearn.preprocessing import FunctionTransformer, OrdinalEncoder, StandardScaler
8
+ from sklearn.impute import SimpleImputer
9
+ from sklearn.pipeline import make_pipeline
10
+ from sklearn.base import BaseEstimator, TransformerMixin
11
+ from sklearn.utils.validation import check_X_y, check_array, check_is_fitted
12
+
13
+ # ========== 1️⃣ Define Custom Preprocessing Functions ==========
14
+
15
+ def temp_cat(X):
16
+ if isinstance(X, pd.DataFrame):
17
+ X['avg_temp_cat'] = pd.cut(
18
+ X['avg_temp'],
19
+ bins=[0, 5, 10, 20, 30, np.inf],
20
+ labels=['very_cold', 'cold', 'warm', 'hot', 'very_hot']
21
+ )
22
+ return X
23
+ else:
24
+ X = pd.DataFrame(X)
25
+ X['avg_temp_cat'] = pd.cut(
26
+ X['avg_temp'],
27
+ bins=[0, 5, 10, 20, 30, np.inf],
28
+ labels=['very_cold', 'cold', 'warm', 'hot', 'very_hot']
29
+ )
30
+ return X
31
+
32
+ def proxy_humidity(X):
33
+ if isinstance(X, pd.DataFrame):
34
+ X["proxy_humidity"] = X["average_rain_fall_mm_per_year"] / (X["avg_temp"] + 1)
35
+ return X
36
+ else:
37
+ X = pd.DataFrame(X)
38
+ X["proxy_humidity"] = X["average_rain_fall_mm_per_year"] / (X["avg_temp"] + 1)
39
+ return X
40
+
41
+ # ========== 2️⃣ Define Custom Transformer Class ==========
42
+
43
+ class CorrelationThresholdSelector(BaseEstimator, TransformerMixin):
44
+ def __init__(self, threshold=0.9, target_threshold=0.0, method="pearson", min_variance=0.0):
45
+ self.threshold = threshold
46
+ self.target_threshold = target_threshold
47
+ self.method = method
48
+ self.min_variance = min_variance
49
+
50
+ def fit(self, X, y):
51
+ X_original = X
52
+ X_arr, y_arr = check_X_y(X, y, accept_sparse=False, dtype=np.float64)
53
+ n_features = X_arr.shape[1]
54
+ self.n_features_in_ = n_features
55
+
56
+ if hasattr(X_original, "columns"):
57
+ self.feature_names_in_ = np.asarray(X_original.columns)
58
+ else:
59
+ self.feature_names_in_ = np.array([f"f{i}" for i in range(n_features)])
60
+
61
+ if n_features <= 1:
62
+ self.features_to_drop_ = np.array([], dtype=int)
63
+ self.selected_features_ = np.arange(n_features, dtype=int)
64
+ return self
65
+
66
+ X_df = pd.DataFrame(X_arr, columns=self.feature_names_in_)
67
+ variances = X_df.var(numeric_only=True)
68
+ low_var_mask = variances <= self.min_variance
69
+ low_var_idx = np.where(low_var_mask)[0].tolist()
70
+
71
+ corr_mat = X_df.corr(method=self.method).abs().values
72
+ np.fill_diagonal(corr_mat, 0.0)
73
+
74
+ y_series = pd.Series(y_arr)
75
+ target_corr_series = X_df.corrwith(y_series, method=self.method).abs().fillna(0.0)
76
+ target_corr = target_corr_series.values
77
+
78
+ visited = set()
79
+ drops = set()
80
+
81
+ for i in range(n_features):
82
+ if i in visited or i in low_var_idx:
83
+ continue
84
+
85
+ correlated_idx = set(np.where(corr_mat[i] > self.threshold)[0].tolist())
86
+ cluster = {i} | correlated_idx
87
+ visited |= cluster
88
+
89
+ if len(cluster) == 1:
90
+ continue
91
+
92
+ best = max(cluster, key=lambda idx: (target_corr[idx], X_df.iloc[:, idx].var()))
93
+
94
+ if self.target_threshold > 0 and target_corr[best] < self.target_threshold:
95
+ drops |= cluster
96
+ else:
97
+ cluster.remove(best)
98
+ drops |= cluster
99
+
100
+ drops |= set(low_var_idx)
101
+ self.features_to_drop_ = np.array(sorted(drops), dtype=int)
102
+ retained = sorted(set(range(n_features)) - set(self.features_to_drop_))
103
+ self.selected_features_ = np.array(retained, dtype=int)
104
+ self.selected_feature_names_ = self.feature_names_in_[self.selected_features_].tolist()
105
+ self.dropped_feature_names_ = self.feature_names_in_[self.features_to_drop_].tolist()
106
+ return self
107
+
108
+ def transform(self, X):
109
+ check_is_fitted(self, "selected_features_")
110
+ X_arr = check_array(X, accept_sparse=False, dtype=np.float64)
111
+ if self.selected_features_.size == 0:
112
+ return np.empty((X_arr.shape[0], 0), dtype=X_arr.dtype)
113
+ sel = np.asarray(self.selected_features_, dtype=int)
114
+ return X_arr[:, sel]
115
+
116
+
117
+ # ========== 3️⃣ Register them for joblib to find ==========
118
  sys.modules['__main__'].temp_cat = temp_cat
119
  sys.modules['__main__'].proxy_humidity = proxy_humidity
120
  sys.modules['__main__'].CorrelationThresholdSelector = CorrelationThresholdSelector
121
 
122
+ # ========== 4️⃣ Initialize FastAPI ==========
123
+ app = FastAPI(title="🌾 Crop Yield Predictor API", version="1.0")
 
 
 
 
 
 
124
 
125
+ # ========== 5️⃣ Load Trained Model ==========
126
+ try:
127
+ model = joblib.load("CropYieldPredictor.pkl")
128
+ print("✅ Model loaded successfully!")
129
+ except Exception as e:
130
+ print(f"❌ Error loading model: {e}")
131
+ model = None
132
 
133
+ # ========== 6️⃣ Define Input Schema ==========
134
  class CropInput(BaseModel):
135
+ Area: str
136
+ Item: str
137
+ Year: int
138
+ average_rain_fall_mm_per_year: float
139
+ pesticides_tonnes: float
140
+ avg_temp: float
141
 
142
+ # ========== 7️⃣ Routes ==========
143
  @app.get("/")
144
  def home():
145
+ return {"message": "🌾 Crop Yield Predictor API is live and running!"}
146
 
 
147
  @app.post("/predict")
148
  def predict_yield(data: CropInput):
149
+ if model is None:
150
+ return {"error": "Model not loaded properly!"}
151
+
152
+ try:
153
+ input_df = pd.DataFrame([data.dict()])
154
+ prediction = model.predict(input_df)[0]
155
+ predicted_yield_kg_ha = prediction * 0.1
 
156
 
157
+ return {
158
+ "predicted_yield_hg_per_ha": float(prediction),
159
+ "predicted_yield_kg_per_ha": float(predicted_yield_kg_ha),
160
+ "message": "✅ Prediction successful!"
161
+ }
162
 
163
+ except Exception as e:
164
+ return {"error": str(e), "message": "❌ Prediction failed due to preprocessing or feature mismatch."}
165
 
166
+ # ========== 8️⃣ Local Run ==========
167
  if __name__ == "__main__":
168
+ import uvicorn
169
  uvicorn.run(app, host="0.0.0.0", port=7860)