# multi_species_pipeline.py import os import json import pickle import numpy as np import pandas as pd from sklearn.preprocessing import MinMaxScaler from tensorflow.keras.models import Sequential from tensorflow.keras.layers import LSTM, Dense # -------- CONFIG -------- SPECIES_FILES = { "mackerel": "migration_timeseries_mackerel.csv", "sardinella": "migration_timeseries_sardinella.csv", "scomber": "migration_timeseries_scomber.csv", "skipjack": "migration_timeseries_skipjack.csv", "tuna": "migration_timeseries_tuna.csv", } # 🚨 This is ONLY a training hyperparameter (not exposed to frontend) SEQUENCE_LENGTH = 3 def train_for_species(species_id: str, ts_csv: str): if not os.path.exists(ts_csv): print(f"[WARN] Timeseries CSV not found for {species_id}: {ts_csv}") return print(f"\n=== Training LSTM for {species_id} from {ts_csv} ===") df = pd.read_csv(ts_csv) df = df.sort_values(["year", "month"]).reset_index(drop=True) required = {"year", "month", "decimalLatitude", "decimalLongitude"} missing = required - set(df.columns) if missing: print(f"[ERROR] Missing columns {missing} in {ts_csv}") return coords = df[["decimalLatitude", "decimalLongitude"]].values scaler = MinMaxScaler() coords_scaled = scaler.fit_transform(coords) X, y = [], [] for i in range(SEQUENCE_LENGTH, len(coords_scaled)): X.append(coords_scaled[i - SEQUENCE_LENGTH:i]) y.append(coords_scaled[i]) X = np.array(X) y = np.array(y) if len(X) == 0: print(f"[ERROR] Not enough data to train for {species_id}") return model = Sequential() model.add(LSTM(64, activation="tanh", input_shape=(SEQUENCE_LENGTH, 2))) model.add(Dense(32, activation="relu")) model.add(Dense(2)) model.compile(optimizer="adam", loss="mse") model.fit(X, y, epochs=50, batch_size=8, verbose=1) out_dir = os.path.join("models", species_id) os.makedirs(out_dir, exist_ok=True) # 🔹 Species-specific filenames model_path = os.path.join(out_dir, f"{species_id}_model.h5") scaler_path = os.path.join(out_dir, f"{species_id}_scaler.pkl") meta_path = os.path.join(out_dir, f"{species_id}_metadata.json") model.save(model_path) with open(scaler_path, "wb") as f: pickle.dump(scaler, f) # 👉 Store everything backend needs (no frontend involvement) metadata = { "species": species_id, "sequence_length": SEQUENCE_LENGTH, # internal "last_year": int(df["year"].iloc[-1]), "last_month": int(df["month"].iloc[-1]), "last_sequence": coords_scaled[-SEQUENCE_LENGTH:].tolist() # internal } with open(meta_path, "w") as f: json.dump(metadata, f, indent=2) print(f"[OK] Saved {model_path}, {scaler_path}, {meta_path}") def main(): os.makedirs("models", exist_ok=True) for species_id, ts_csv in SPECIES_FILES.items(): train_for_species(species_id, ts_csv) if __name__ == "__main__": main()