Spaces:

MHamzaShahid
/

Crop-Yield-Predictor-API

Sleeping

App Files Files Community

MHamzaShahid commited on Oct 7, 2025

Commit

a3c60c5

verified ·

1 Parent(s): 92c3eaa

Update app.py

Browse files

Files changed (1) hide show

app.py +84 -16

app.py CHANGED Viewed

@@ -10,7 +10,9 @@ from sklearn.pipeline import make_pipeline
 from sklearn.base import BaseEstimator, TransformerMixin
 from sklearn.utils.validation import check_X_y, check_array, check_is_fitted
-# ========== 1️⃣ Define Custom Preprocessing Functions ==========
 def temp_cat(X):
     if isinstance(X, pd.DataFrame):
@@ -29,6 +31,14 @@ def temp_cat(X):
         )
         return X
 def proxy_humidity(X):
     if isinstance(X, pd.DataFrame):
         X["proxy_humidity"] = X["average_rain_fall_mm_per_year"] / (X["avg_temp"] + 1)
@@ -38,7 +48,49 @@ def proxy_humidity(X):
         X["proxy_humidity"] = X["average_rain_fall_mm_per_year"] / (X["avg_temp"] + 1)
         return X
-# ========== 2️⃣ Define Custom Transformer Class ==========
 class CorrelationThresholdSelector(BaseEstimator, TransformerMixin):
     def __init__(self, threshold=0.9, target_threshold=0.0, method="pearson", min_variance=0.0):
@@ -75,22 +127,17 @@ class CorrelationThresholdSelector(BaseEstimator, TransformerMixin):
         target_corr_series = X_df.corrwith(y_series, method=self.method).abs().fillna(0.0)
         target_corr = target_corr_series.values
-        visited = set()
-        drops = set()
         for i in range(n_features):
             if i in visited or i in low_var_idx:
                 continue
             correlated_idx = set(np.where(corr_mat[i] > self.threshold)[0].tolist())
             cluster = {i} | correlated_idx
             visited |= cluster
             if len(cluster) == 1:
                 continue
             best = max(cluster, key=lambda idx: (target_corr[idx], X_df.iloc[:, idx].var()))
             if self.target_threshold > 0 and target_corr[best] < self.target_threshold:
                 drops |= cluster
             else:
@@ -114,15 +161,24 @@ class CorrelationThresholdSelector(BaseEstimator, TransformerMixin):
         return X_arr[:, sel]
-# ========== 3️⃣ Register them for joblib to find ==========
 sys.modules['__main__'].temp_cat = temp_cat
 sys.modules['__main__'].proxy_humidity = proxy_humidity
 sys.modules['__main__'].CorrelationThresholdSelector = CorrelationThresholdSelector
-# ========== 4️⃣ Initialize FastAPI ==========
 app = FastAPI(title="🌾 Crop Yield Predictor API", version="1.0")
-# ========== 5️⃣ Load Trained Model ==========
 try:
     model = joblib.load("CropYieldPredictor.pkl")
     print("✅ Model loaded successfully!")
@@ -130,7 +186,10 @@ except Exception as e:
     print(f"❌ Error loading model: {e}")
     model = None
-# ========== 6️⃣ Define Input Schema ==========
 class CropInput(BaseModel):
     Area: str
     Item: str
@@ -139,11 +198,15 @@ class CropInput(BaseModel):
     pesticides_tonnes: float
     avg_temp: float
-# ========== 7️⃣ Routes ==========
 @app.get("/")
 def home():
     return {"message": "🌾 Crop Yield Predictor API is live and running!"}
 @app.post("/predict")
 def predict_yield(data: CropInput):
     if model is None:
@@ -159,11 +222,16 @@ def predict_yield(data: CropInput):
             "predicted_yield_kg_per_ha": float(predicted_yield_kg_ha),
             "message": "✅ Prediction successful!"
         }
     except Exception as e:
-        return {"error": str(e), "message": "❌ Prediction failed due to preprocessing or feature mismatch."}
-# ========== 8️⃣ Local Run ==========
 if __name__ == "__main__":
     import uvicorn
     uvicorn.run(app, host="0.0.0.0", port=7860)

 from sklearn.base import BaseEstimator, TransformerMixin
 from sklearn.utils.validation import check_X_y, check_array, check_is_fitted
+# ================================
+# 1️⃣ Custom Preprocessing Functions
+# ================================
 def temp_cat(X):
     if isinstance(X, pd.DataFrame):
         )
         return X
+def clean(X):
+    if isinstance(X, pd.DataFrame):
+        return X.dropna()
+    else:
+        return pd.DataFrame(X).dropna()
 def proxy_humidity(X):
     if isinstance(X, pd.DataFrame):
         X["proxy_humidity"] = X["average_rain_fall_mm_per_year"] / (X["avg_temp"] + 1)
         X["proxy_humidity"] = X["average_rain_fall_mm_per_year"] / (X["avg_temp"] + 1)
         return X
+# ================================
+# 2️⃣ Transformers and Pipelines
+# ================================
+temp_cat_transformer = FunctionTransformer(temp_cat)
+temp_cat_pipeline = make_pipeline(
+    temp_cat_transformer,
+    OrdinalEncoder(
+        handle_unknown='use_encoded_value',
+        unknown_value=-1
+    )
+)
+clean_transformer = FunctionTransformer(clean)
+clean_pipeline = make_pipeline(
+    clean_transformer,
+    StandardScaler()
+)
+cat_pipeline = make_pipeline(
+    SimpleImputer(strategy="most_frequent"),
+    OrdinalEncoder(handle_unknown='use_encoded_value', unknown_value=-1)
+)
+proxy_humidity_transformer = FunctionTransformer(proxy_humidity)
+proxy_humidity_pipeline = make_pipeline(
+    proxy_humidity_transformer,
+    StandardScaler()
+)
+square_transformer = FunctionTransformer(np.square)
+square_pipeline = make_pipeline(square_transformer, StandardScaler())
+log_transformer = FunctionTransformer(np.log1p)
+log_pipeline = make_pipeline(log_transformer, StandardScaler())
+default_num_pipeline = make_pipeline(StandardScaler())
+# ================================
+# 3️⃣ Custom Feature Selector
+# ================================
 class CorrelationThresholdSelector(BaseEstimator, TransformerMixin):
     def __init__(self, threshold=0.9, target_threshold=0.0, method="pearson", min_variance=0.0):
         target_corr_series = X_df.corrwith(y_series, method=self.method).abs().fillna(0.0)
         target_corr = target_corr_series.values
+        visited, drops = set(), set()
         for i in range(n_features):
             if i in visited or i in low_var_idx:
                 continue
             correlated_idx = set(np.where(corr_mat[i] > self.threshold)[0].tolist())
             cluster = {i} | correlated_idx
             visited |= cluster
             if len(cluster) == 1:
                 continue
             best = max(cluster, key=lambda idx: (target_corr[idx], X_df.iloc[:, idx].var()))
             if self.target_threshold > 0 and target_corr[best] < self.target_threshold:
                 drops |= cluster
             else:
         return X_arr[:, sel]
+# ================================
+# 4️⃣ Register All Functions for joblib
+# ================================
 sys.modules['__main__'].temp_cat = temp_cat
+sys.modules['__main__'].clean = clean
 sys.modules['__main__'].proxy_humidity = proxy_humidity
 sys.modules['__main__'].CorrelationThresholdSelector = CorrelationThresholdSelector
+# ================================
+# 5️⃣ Initialize FastAPI
+# ================================
 app = FastAPI(title="🌾 Crop Yield Predictor API", version="1.0")
+# ================================
+# 6️⃣ Load Model
+# ================================
 try:
     model = joblib.load("CropYieldPredictor.pkl")
     print("✅ Model loaded successfully!")
     print(f"❌ Error loading model: {e}")
     model = None
+# ================================
+# 7️⃣ Define Input Schema
+# ================================
 class CropInput(BaseModel):
     Area: str
     Item: str
     pesticides_tonnes: float
     avg_temp: float
+# ================================
+# 8️⃣ Routes
+# ================================
 @app.get("/")
 def home():
     return {"message": "🌾 Crop Yield Predictor API is live and running!"}
 @app.post("/predict")
 def predict_yield(data: CropInput):
     if model is None:
             "predicted_yield_kg_per_ha": float(predicted_yield_kg_ha),
             "message": "✅ Prediction successful!"
         }
     except Exception as e:
+        return {
+            "error": str(e),
+            "message": "❌ Prediction failed due to preprocessing or feature mismatch."
+        }
+# ================================
+# 9️⃣ Local Run
+# ================================
 if __name__ == "__main__":
     import uvicorn
     uvicorn.run(app, host="0.0.0.0", port=7860)