Rick commited on
Commit
fec2ed4
Β·
verified Β·
1 Parent(s): 35ae7f9

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +417 -0
app.py ADDED
@@ -0,0 +1,417 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # app.py
2
+ import streamlit as st
3
+ import pickle
4
+ import pandas as pd
5
+ import numpy as np
6
+ from sklearn.preprocessing import FunctionTransformer, OrdinalEncoder, StandardScaler
7
+ from sklearn.impute import SimpleImputer
8
+ from sklearn.pipeline import make_pipeline
9
+ from sklearn.base import BaseEstimator, TransformerMixin
10
+ from sklearn.utils.validation import check_X_y, check_array, check_is_fitted
11
+ import os
12
+ import warnings
13
+ warnings.filterwarnings('ignore')
14
+
15
+ # ======== ALL PREPROCESSING FUNCTIONS AND PIPELINES ========
16
+
17
+ def temp_cat(X):
18
+ if isinstance(X, pd.DataFrame):
19
+ X['avg_temp_cat'] = pd.cut(X['avg_temp'], bins=[0, 5, 10, 20, 30, np.inf], labels=['very_cold', 'cold', 'warm', 'hot', 'very_hot'])
20
+ return X
21
+ else:
22
+ X = pd.DataFrame(X)
23
+ X['avg_temp_cat'] = pd.cut(X['avg_temp'], bins=[0, 5, 10, 20, 30, np.inf], labels=['very_cold', 'cold', 'warm', 'hot', 'very_hot'])
24
+ return X
25
+
26
+ # Create all the transformers and pipelines
27
+ temp_cat_transformer = FunctionTransformer(temp_cat)
28
+ temp_cat_pipeline = make_pipeline(
29
+ temp_cat_transformer,
30
+ OrdinalEncoder(handle_unknown='use_encoded_value', unknown_value=-1)
31
+ )
32
+
33
+ def clean(X):
34
+ if isinstance(X, pd.DataFrame):
35
+ return X.dropna()
36
+ else:
37
+ return pd.DataFrame(X).dropna()
38
+
39
+ clean_transformer = FunctionTransformer(clean)
40
+ clean_pipeline = make_pipeline(clean_transformer, StandardScaler())
41
+
42
+ cat_pipeline = make_pipeline(
43
+ SimpleImputer(strategy="most_frequent"),
44
+ OrdinalEncoder(handle_unknown='use_encoded_value', unknown_value=-1)
45
+ )
46
+
47
+ def proxy_humidity(X):
48
+ if isinstance(X, pd.DataFrame):
49
+ X["proxy_humidity"] = X["average_rain_fall_mm_per_year"] / (X["avg_temp"] + 1)
50
+ return X
51
+ else:
52
+ X = pd.DataFrame(X)
53
+ X["proxy_humidity"] = X["average_rain_fall_mm_per_year"] / (X["avg_temp"] + 1)
54
+ return X
55
+
56
+ proxy_humidity_transformer = FunctionTransformer(proxy_humidity)
57
+ proxy_humidity_pipeline = make_pipeline(proxy_humidity_transformer, StandardScaler())
58
+
59
+ square_transformer = FunctionTransformer(np.square)
60
+ square_pipeline = make_pipeline(square_transformer, StandardScaler())
61
+
62
+ log_transformer = FunctionTransformer(np.log1p)
63
+ log_pipeline = make_pipeline(log_transformer, StandardScaler())
64
+
65
+ default_num_pipeline = make_pipeline(StandardScaler())
66
+
67
+ # Correlation Threshold Selector Class
68
+ class CorrelationThresholdSelector(BaseEstimator, TransformerMixin):
69
+ def __init__(self, threshold=0.9, target_threshold=0.0, method="pearson", min_variance=0.0):
70
+ self.threshold = threshold
71
+ self.target_threshold = target_threshold
72
+ self.method = method
73
+ self.min_variance = min_variance
74
+
75
+ def fit(self, X, y):
76
+ X_original = X
77
+ X_arr, y_arr = check_X_y(X, y, accept_sparse=False, dtype=np.float64)
78
+ n_features = X_arr.shape[1]
79
+ self.n_features_in_ = n_features
80
+
81
+ if hasattr(X_original, "columns"):
82
+ self.feature_names_in_ = np.asarray(X_original.columns)
83
+ else:
84
+ self.feature_names_in_ = np.array([f"f{i}" for i in range(n_features)])
85
+
86
+ if n_features <= 1:
87
+ self.features_to_drop_ = np.array([], dtype=int)
88
+ self.selected_features_ = np.arange(n_features, dtype=int)
89
+ return self
90
+
91
+ X_df = pd.DataFrame(X_arr, columns=self.feature_names_in_)
92
+ variances = X_df.var(numeric_only=True)
93
+ low_var_mask = variances <= self.min_variance
94
+ low_var_idx = np.where(low_var_mask)[0].tolist()
95
+
96
+ corr_mat = X_df.corr(method=self.method).abs().values
97
+ np.fill_diagonal(corr_mat, 0.0)
98
+
99
+ y_series = pd.Series(y_arr)
100
+ target_corr_series = X_df.corrwith(y_series, method=self.method).abs().fillna(0.0)
101
+ target_corr = target_corr_series.values
102
+
103
+ visited = set()
104
+ drops = set()
105
+
106
+ for i in range(n_features):
107
+ if i in visited or i in low_var_idx:
108
+ continue
109
+
110
+ correlated_idx = set(np.where(corr_mat[i] > self.threshold)[0].tolist())
111
+ cluster = {i} | correlated_idx
112
+ visited |= cluster
113
+
114
+ if len(cluster) == 1:
115
+ continue
116
+
117
+ best = max(cluster, key=lambda idx: (target_corr[idx], X_df.iloc[:, idx].var()))
118
+
119
+ if self.target_threshold > 0 and target_corr[best] < self.target_threshold:
120
+ drops |= cluster
121
+ else:
122
+ cluster.remove(best)
123
+ drops |= cluster
124
+
125
+ drops |= set(low_var_idx)
126
+ self.features_to_drop_ = np.array(sorted(drops), dtype=int)
127
+ retained = sorted(set(range(n_features)) - set(self.features_to_drop_))
128
+ self.selected_features_ = np.array(retained, dtype=int)
129
+ self.selected_feature_names_ = self.feature_names_in_[self.selected_features_].tolist()
130
+ self.dropped_feature_names_ = self.feature_names_in_[self.features_to_drop_].tolist()
131
+
132
+ return self
133
+
134
+ def transform(self, X):
135
+ check_is_fitted(self, "selected_features_")
136
+ X_arr = check_array(X, accept_sparse=False, dtype=np.float64)
137
+
138
+ if self.selected_features_.size == 0:
139
+ return np.empty((X_arr.shape[0], 0), dtype=X_arr.dtype)
140
+
141
+ sel = np.asarray(self.selected_features_, dtype=int)
142
+ return X_arr[:, sel]
143
+
144
+ def inverse_transform(self, X):
145
+ check_is_fitted(self, "selected_features_")
146
+ X_arr = check_array(X, accept_sparse=False, dtype=np.float64)
147
+
148
+ n_samples = X_arr.shape[0]
149
+ full = np.zeros((n_samples, self.n_features_in_), dtype=X_arr.dtype)
150
+ full[:, self.selected_features_] = X_arr
151
+ return full
152
+
153
+ def get_support(self, indices=False):
154
+ check_is_fitted(self, "selected_features_")
155
+ mask = np.zeros(self.n_features_in_, dtype=bool)
156
+ mask[self.selected_features_] = True
157
+ return np.where(mask)[0] if indices else mask
158
+
159
+ def get_feature_names_out(self, input_features=None):
160
+ check_is_fitted(self, "selected_features_")
161
+ if input_features is None:
162
+ input_features = self.feature_names_in_
163
+ input_features = np.asarray(input_features)
164
+ if len(input_features) != self.n_features_in_:
165
+ raise ValueError("input_features length mismatch")
166
+ return input_features[self.selected_features_]
167
+
168
+ # ======== FIXED MODEL LOADING ========
169
+
170
+ def load_model_properly():
171
+ """Load the actual trained model without fallback bullshit"""
172
+ model_path = 'CropYieldPredictor.pkl'
173
+
174
+ if not os.path.exists(model_path):
175
+ st.error(f"❌ Model file '{model_path}' not found in current directory!")
176
+ st.error("Please make sure 'CropYieldPredictor.pkl' is in the same folder as this script.")
177
+ return None
178
+
179
+ try:
180
+ # Try different protocols
181
+ with open(model_path, 'rb') as file:
182
+ model = pickle.load(file)
183
+ st.success("βœ… Trained model loaded successfully!")
184
+ return model
185
+ except Exception as e:
186
+ st.error(f"❌ Error loading trained model: {str(e)}")
187
+
188
+ # Try alternative loading methods
189
+ try:
190
+ import joblib
191
+ model = joblib.load(model_path)
192
+ st.success("βœ… Model loaded with joblib!")
193
+ return model
194
+ except:
195
+ pass
196
+
197
+ try:
198
+ with open(model_path, 'rb') as file:
199
+ model = pickle.load(file, encoding='latin1')
200
+ st.success("βœ… Model loaded with latin1 encoding!")
201
+ return model
202
+ except Exception as e2:
203
+ st.error(f"❌ All loading methods failed: {str(e2)}")
204
+ return None
205
+
206
+ # ======== STREAMLIT APP CODE ========
207
+
208
+ # Page configuration
209
+ st.set_page_config(
210
+ page_title="Crop Yield Predictor",
211
+ page_icon="🌾",
212
+ layout="wide"
213
+ )
214
+
215
+ # Custom CSS
216
+ st.markdown("""
217
+ <style>
218
+ .main-header {
219
+ font-size: 2.5rem;
220
+ color: #2e8b57;
221
+ text-align: center;
222
+ margin-bottom: 2rem;
223
+ }
224
+ .prediction-result {
225
+ background-color: #f0f8f0;
226
+ padding: 20px;
227
+ border-radius: 10px;
228
+ border-left: 5px solid #2e8b57;
229
+ margin: 20px 0;
230
+ }
231
+ .feature-box {
232
+ background-color: #f9f9f9;
233
+ padding: 15px;
234
+ border-radius: 8px;
235
+ margin: 10px 0;
236
+ }
237
+ .error-box {
238
+ background-color: #ffe6e6;
239
+ padding: 15px;
240
+ border-radius: 8px;
241
+ border-left: 5px solid #ff4444;
242
+ margin: 10px 0;
243
+ }
244
+ </style>
245
+ """, unsafe_allow_html=True)
246
+
247
+ # Load the actual trained model
248
+ @st.cache_resource
249
+ def load_model():
250
+ return load_model_properly()
251
+
252
+ # Available areas
253
+ AVAILABLE_AREAS = [
254
+ 'Albania', 'Algeria', 'Angola', 'Argentina', 'Armenia', 'Australia', 'Austria',
255
+ 'Azerbaijan', 'Bahamas', 'Bahrain', 'Bangladesh', 'Belarus', 'Belgium', 'Botswana',
256
+ 'Brazil', 'Bulgaria', 'Burkina Faso', 'Burundi', 'Cameroon', 'Canada',
257
+ 'Central African Republic', 'Chile', 'Colombia', 'Croatia', 'Denmark',
258
+ 'Dominican Republic', 'Ecuador', 'Egypt', 'El Salvador', 'Eritrea', 'Estonia',
259
+ 'Finland', 'France', 'Germany', 'Ghana', 'Greece', 'Guatemala', 'Guinea',
260
+ 'Guyana', 'Haiti', 'Honduras', 'Hungary', 'India', 'Indonesia', 'Iraq',
261
+ 'Ireland', 'Italy', 'Jamaica', 'Japan', 'Kazakhstan', 'Kenya', 'Latvia',
262
+ 'Lebanon', 'Lesotho', 'Libya', 'Lithuania', 'Madagascar', 'Malawi', 'Malaysia',
263
+ 'Mali', 'Mauritania', 'Mauritius', 'Mexico', 'Montenegro', 'Morocco',
264
+ 'Mozambique', 'Namibia', 'Nepal', 'Netherlands', 'New Zealand', 'Nicaragua',
265
+ 'Niger', 'Norway', 'Pakistan', 'Papua New Guinea', 'Peru', 'Poland', 'Portugal',
266
+ 'Qatar', 'Romania', 'Rwanda', 'Saudi Arabia', 'Senegal', 'Slovenia',
267
+ 'South Africa', 'Spain', 'Sri Lanka', 'Sudan', 'Suriname', 'Sweden',
268
+ 'Switzerland', 'Tajikistan', 'Thailand', 'Tunisia', 'Turkey', 'Uganda',
269
+ 'Ukraine', 'United Kingdom', 'Uruguay', 'Zambia', 'Zimbabwe'
270
+ ]
271
+
272
+ # Main app
273
+ def main():
274
+ st.markdown('<h1 class="main-header">🌾 Crop Yield Predictor | Build BY M Hamza Shahid</h1>', unsafe_allow_html=True)
275
+
276
+ # Load model
277
+ model = load_model()
278
+
279
+ if model is None:
280
+ st.markdown('<div class="error-box">', unsafe_allow_html=True)
281
+ st.error("""
282
+ **Cannot load the trained model. Please check:**
283
+ 1. 'CropYieldPredictor.pkl' exists in the current directory
284
+ 2. The file is not corrupted
285
+ 3. You're using compatible Python/scikit-learn versions
286
+ """)
287
+ st.markdown('</div>', unsafe_allow_html=True)
288
+
289
+ # Show current directory files
290
+ st.write("**Files in current directory:**")
291
+ current_files = [f for f in os.listdir('.') if os.path.isfile(f)]
292
+ st.write(current_files)
293
+ st.stop()
294
+
295
+ # Create two columns for layout
296
+ col1, col2 = st.columns([1, 1])
297
+
298
+ with col1:
299
+ st.subheader("πŸ“Š Input Parameters")
300
+
301
+ with st.form("prediction_form"):
302
+ st.markdown('<div class="feature-box">', unsafe_allow_html=True)
303
+
304
+ # Input fields with dropdown for areas and text input for crops
305
+ area = st.selectbox("🌍 Country/Area", AVAILABLE_AREAS, index=AVAILABLE_AREAS.index('India'))
306
+ item = st.text_input("🌱 Crop Type", "Maize")
307
+ year = st.number_input("πŸ“… Year", min_value=1960, max_value=2030, value=2023)
308
+ rainfall = st.text_input("πŸ’§ Average Rainfall (mm/year)", "800.0")
309
+ pesticides = st.text_input("🧴 Pesticides (tonnes)", "5000.0")
310
+ temperature = st.text_input("🌑️ Average Temperature (°C)", "20.0")
311
+
312
+ st.markdown('</div>', unsafe_allow_html=True)
313
+
314
+ # Submit button
315
+ submitted = st.form_submit_button("πŸš€ Predict Yield", use_container_width=True)
316
+
317
+ with col2:
318
+ st.subheader("πŸ“ˆ Prediction Results")
319
+
320
+ if submitted:
321
+ try:
322
+ # Convert text inputs to float
323
+ rainfall_val = float(rainfall)
324
+ pesticides_val = float(pesticides)
325
+ temperature_val = float(temperature)
326
+
327
+ # Create input data for the model
328
+ input_data = {
329
+ 'Area': [area],
330
+ 'Item': [item],
331
+ 'Year': [year],
332
+ 'average_rain_fall_mm_per_year': [rainfall_val],
333
+ 'pesticides_tonnes': [pesticides_val],
334
+ 'avg_temp': [temperature_val]
335
+ }
336
+
337
+ # Convert to DataFrame
338
+ input_df = pd.DataFrame(input_data)
339
+
340
+ # Show input data
341
+ st.write("**Input Data:**")
342
+ st.dataframe(input_df, use_container_width=True)
343
+
344
+ # Make prediction with spinner
345
+ with st.spinner("πŸ€– Making prediction with trained model..."):
346
+ prediction = model.predict(input_df)
347
+ predicted_yield = prediction[0]
348
+
349
+ # Convert hg/ha to kg/ha
350
+ predicted_yield_kg_ha = predicted_yield * 0.1
351
+
352
+ # Display results
353
+ st.markdown('<div class="prediction-result">', unsafe_allow_html=True)
354
+ st.metric("Predicted Yield", f"{predicted_yield_kg_ha:,.0f} kg/ha",
355
+ delta=f"{predicted_yield:,.0f} hg/ha")
356
+
357
+ # Interpretation
358
+ if predicted_yield_kg_ha < 2000:
359
+ st.warning("πŸ“‰ Below average yield predicted. Consider optimizing farming practices.")
360
+ elif predicted_yield_kg_ha > 5000:
361
+ st.success("πŸ“ˆ Excellent yield predicted! Optimal conditions detected.")
362
+ else:
363
+ st.info("πŸ“Š Good yield predicted within normal range.")
364
+
365
+ st.markdown('</div>', unsafe_allow_html=True)
366
+
367
+ except ValueError:
368
+ st.error("❌ Please enter valid numeric values for Rainfall, Pesticides, and Temperature")
369
+ except Exception as e:
370
+ st.error(f"❌ Prediction failed: {str(e)}")
371
+ st.info("This might be a feature name mismatch. Check if your trained model expects the exact same feature names.")
372
+
373
+ # Debug info
374
+ with st.expander("πŸ”§ Debug Information"):
375
+ st.write("Model type:", type(model))
376
+ if hasattr(model, 'feature_names_in_'):
377
+ st.write("Expected features:", model.feature_names_in_)
378
+ st.write("Input features:", list(input_df.columns))
379
+
380
+ # Model information in sidebar
381
+ with st.sidebar:
382
+ st.subheader("ℹ️ Model Information")
383
+ st.write(f"**Model Type:** {type(model).__name__}")
384
+
385
+ # Show model details
386
+ if hasattr(model, 'steps'):
387
+ st.write("**Pipeline Steps:**")
388
+ for step_name, step in model.steps:
389
+ st.write(f"- {step_name}: {type(step).__name__}")
390
+
391
+ st.write("**Features Used:**")
392
+ st.write("- Area (Country/Region)")
393
+ st.write("- Item (Crop Type)")
394
+ st.write("- Year")
395
+ st.write("- average_rain_fall_mm_per_year")
396
+ st.write("- pesticides_tonnes")
397
+ st.write("- avg_temp")
398
+
399
+ st.subheader("πŸ”§ Model Status")
400
+ st.success("βœ… Trained model loaded and ready!")
401
+
402
+ # File info
403
+ model_path = 'CropYieldPredictor.pkl'
404
+ if os.path.exists(model_path):
405
+ file_size = os.path.getsize(model_path) / 1024 / 1024
406
+ st.write(f"**Model file size:** {file_size:.2f} MB")
407
+
408
+ # Footer
409
+ st.markdown("---")
410
+ st.markdown("""
411
+ <div style='text-align: center; color: #666;'>
412
+ <p>Built with ❀️ using Streamlit | Build BY M Hamza Shahid | This project is build for Uraan AI Techathton 1.0</p>
413
+ </div>
414
+ """, unsafe_allow_html=True)
415
+
416
+ if __name__ == "__main__":
417
+ main()