Spaces:
Sleeping
Sleeping
| # model.py | |
| import joblib | |
| import pandas as pd | |
| import numpy as np | |
| from sklearn.ensemble import GradientBoostingRegressor | |
| from sklearn.multioutput import MultiOutputRegressor | |
| from sklearn.model_selection import train_test_split | |
| MODEL_PATH = "geo_metals_model.joblib" | |
| def train_model(): | |
| df = pd.read_csv("baseline_heavy_metals_multi.csv") | |
| # Print raw columns for debug | |
| print("Raw columns:", df.columns.tolist()) | |
| # Standardize column names | |
| df.columns = df.columns.str.strip().str.lower() | |
| # Try to auto-detect lat/lon columns | |
| possible_lat = [col for col in df.columns if "lat" in col] | |
| possible_lon = [col for col in df.columns if "lon" in col] | |
| if not possible_lat or not possible_lon: | |
| raise KeyError("Latitude and/or Longitude columns not found. Please check your column names.") | |
| lat_col = possible_lat[0] | |
| lon_col = possible_lon[0] | |
| # Rename consistently | |
| df = df.rename(columns={ | |
| lat_col: 'Latitude', | |
| lon_col: 'Longitude', | |
| 'fe': 'Fe_ppm', 'cr': 'Cr_ppm', 'mn': 'Mn_ppm', | |
| 'mo': 'Mo_ppm', 'in': 'In_ppm', 'ta': 'Ta_ppm' | |
| }) | |
| features = df[["Latitude", "Longitude"]] | |
| targets = df[["Fe_ppm", "Cr_ppm", "Mn_ppm", "Mo_ppm", "In_ppm", "Ta_ppm"]] | |
| X_train, X_test, y_train, y_test = train_test_split(features, targets, test_size=0.2, random_state=42) | |
| model = MultiOutputRegressor(GradientBoostingRegressor()) | |
| model.fit(X_train, y_train) | |
| joblib.dump(model, MODEL_PATH) | |
| return model | |
| model = train_model() | |
| def predict_metals(lat, lon): | |
| input_data = pd.DataFrame([{ | |
| 'Latitude': lat, | |
| 'Longitude': lon | |
| }]) | |
| prediction = model.predict(input_data)[0] | |
| metals = ['Fe_ppm', 'Cr_ppm', 'Mn_ppm', 'Mo_ppm', 'In_ppm', 'Ta_ppm'] | |
| return dict(zip(metals, prediction)) | |