# src/model_training.py import os import pickle from sklearn.model_selection import train_test_split from sklearn.ensemble import RandomForestClassifier from sklearn.preprocessing import LabelEncoder from data_processing import load_data, preprocess_features def train_and_save_model(): # Load and preprocess dataset df = load_data() X, y = preprocess_features(df) # Encode target labels le = LabelEncoder() y_encoded = le.fit_transform(y) # Split into training and testing sets X_train, X_test, y_train, y_test = train_test_split( X, y_encoded, test_size=0.2, random_state=42 ) # Train Random Forest Classifier model = RandomForestClassifier(n_estimators=100, random_state=42) model.fit(X_train, y_train) # Save model and label encoder together model_path = os.path.join(os.path.dirname(__file__), '..', 'models', 'crop_model.pkl') with open(model_path, 'wb') as f: pickle.dump({'model': model, 'label_encoder': le}, f) # Print accuracy on test set accuracy = model.score(X_test, y_test) print(f"✅ Model trained and saved at {model_path}") print(f"Test Accuracy: {accuracy:.2f}") if __name__ == "__main__": train_and_save_model()