| # train_model.py | |
| import pandas as pd | |
| from sklearn.model_selection import train_test_split | |
| from sklearn.ensemble import RandomForestClassifier | |
| import joblib | |
| from huggingface_hub import HfApi, HfFolder, Repository | |
| # 1. Load dataset | |
| df = pd.read_csv("water_quality_dataset.csv") | |
| # Features & labels | |
| X = df.drop(columns=["label"]) | |
| y = df["label"] | |
| # 2. Split data | |
| X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) | |
| # 3. Train model | |
| model = RandomForestClassifier() | |
| model.fit(X_train, y_train) | |
| # 4. Save model | |
| joblib.dump(model, "model.joblib") | |
| print("✅ Model trained and saved as model.joblib") | |