Spaces:
Sleeping
Sleeping
| # multi_species_pipeline.py | |
| import os | |
| import json | |
| import pickle | |
| import numpy as np | |
| import pandas as pd | |
| from sklearn.preprocessing import MinMaxScaler | |
| from tensorflow.keras.models import Sequential | |
| from tensorflow.keras.layers import LSTM, Dense | |
| # -------- CONFIG -------- | |
| SPECIES_FILES = { | |
| "mackerel": "migration_timeseries_mackerel.csv", | |
| "sardinella": "migration_timeseries_sardinella.csv", | |
| "scomber": "migration_timeseries_scomber.csv", | |
| "skipjack": "migration_timeseries_skipjack.csv", | |
| "tuna": "migration_timeseries_tuna.csv", | |
| } | |
| # π¨ This is ONLY a training hyperparameter (not exposed to frontend) | |
| SEQUENCE_LENGTH = 3 | |
| def train_for_species(species_id: str, ts_csv: str): | |
| if not os.path.exists(ts_csv): | |
| print(f"[WARN] Timeseries CSV not found for {species_id}: {ts_csv}") | |
| return | |
| print(f"\n=== Training LSTM for {species_id} from {ts_csv} ===") | |
| df = pd.read_csv(ts_csv) | |
| df = df.sort_values(["year", "month"]).reset_index(drop=True) | |
| required = {"year", "month", "decimalLatitude", "decimalLongitude"} | |
| missing = required - set(df.columns) | |
| if missing: | |
| print(f"[ERROR] Missing columns {missing} in {ts_csv}") | |
| return | |
| coords = df[["decimalLatitude", "decimalLongitude"]].values | |
| scaler = MinMaxScaler() | |
| coords_scaled = scaler.fit_transform(coords) | |
| X, y = [], [] | |
| for i in range(SEQUENCE_LENGTH, len(coords_scaled)): | |
| X.append(coords_scaled[i - SEQUENCE_LENGTH:i]) | |
| y.append(coords_scaled[i]) | |
| X = np.array(X) | |
| y = np.array(y) | |
| if len(X) == 0: | |
| print(f"[ERROR] Not enough data to train for {species_id}") | |
| return | |
| model = Sequential() | |
| model.add(LSTM(64, activation="tanh", input_shape=(SEQUENCE_LENGTH, 2))) | |
| model.add(Dense(32, activation="relu")) | |
| model.add(Dense(2)) | |
| model.compile(optimizer="adam", loss="mse") | |
| model.fit(X, y, epochs=50, batch_size=8, verbose=1) | |
| out_dir = os.path.join("models", species_id) | |
| os.makedirs(out_dir, exist_ok=True) | |
| # πΉ Species-specific filenames | |
| model_path = os.path.join(out_dir, f"{species_id}_model.h5") | |
| scaler_path = os.path.join(out_dir, f"{species_id}_scaler.pkl") | |
| meta_path = os.path.join(out_dir, f"{species_id}_metadata.json") | |
| model.save(model_path) | |
| with open(scaler_path, "wb") as f: | |
| pickle.dump(scaler, f) | |
| # π Store everything backend needs (no frontend involvement) | |
| metadata = { | |
| "species": species_id, | |
| "sequence_length": SEQUENCE_LENGTH, # internal | |
| "last_year": int(df["year"].iloc[-1]), | |
| "last_month": int(df["month"].iloc[-1]), | |
| "last_sequence": coords_scaled[-SEQUENCE_LENGTH:].tolist() # internal | |
| } | |
| with open(meta_path, "w") as f: | |
| json.dump(metadata, f, indent=2) | |
| print(f"[OK] Saved {model_path}, {scaler_path}, {meta_path}") | |
| def main(): | |
| os.makedirs("models", exist_ok=True) | |
| for species_id, ts_csv in SPECIES_FILES.items(): | |
| train_for_species(species_id, ts_csv) | |
| if __name__ == "__main__": | |
| main() | |