Spaces:
Sleeping
Sleeping
File size: 3,197 Bytes
45e0498 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 | # multi_species_pipeline.py
import os
import json
import pickle
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
# -------- CONFIG --------
SPECIES_FILES = {
"mackerel": "migration_timeseries_mackerel.csv",
"sardinella": "migration_timeseries_sardinella.csv",
"scomber": "migration_timeseries_scomber.csv",
"skipjack": "migration_timeseries_skipjack.csv",
"tuna": "migration_timeseries_tuna.csv",
}
# ๐จ This is ONLY a training hyperparameter (not exposed to frontend)
SEQUENCE_LENGTH = 3
def train_for_species(species_id: str, ts_csv: str):
if not os.path.exists(ts_csv):
print(f"[WARN] Timeseries CSV not found for {species_id}: {ts_csv}")
return
print(f"\n=== Training LSTM for {species_id} from {ts_csv} ===")
df = pd.read_csv(ts_csv)
df = df.sort_values(["year", "month"]).reset_index(drop=True)
required = {"year", "month", "decimalLatitude", "decimalLongitude"}
missing = required - set(df.columns)
if missing:
print(f"[ERROR] Missing columns {missing} in {ts_csv}")
return
coords = df[["decimalLatitude", "decimalLongitude"]].values
scaler = MinMaxScaler()
coords_scaled = scaler.fit_transform(coords)
X, y = [], []
for i in range(SEQUENCE_LENGTH, len(coords_scaled)):
X.append(coords_scaled[i - SEQUENCE_LENGTH:i])
y.append(coords_scaled[i])
X = np.array(X)
y = np.array(y)
if len(X) == 0:
print(f"[ERROR] Not enough data to train for {species_id}")
return
model = Sequential()
model.add(LSTM(64, activation="tanh", input_shape=(SEQUENCE_LENGTH, 2)))
model.add(Dense(32, activation="relu"))
model.add(Dense(2))
model.compile(optimizer="adam", loss="mse")
model.fit(X, y, epochs=50, batch_size=8, verbose=1)
out_dir = os.path.join("models", species_id)
os.makedirs(out_dir, exist_ok=True)
# ๐น Species-specific filenames
model_path = os.path.join(out_dir, f"{species_id}_model.h5")
scaler_path = os.path.join(out_dir, f"{species_id}_scaler.pkl")
meta_path = os.path.join(out_dir, f"{species_id}_metadata.json")
model.save(model_path)
with open(scaler_path, "wb") as f:
pickle.dump(scaler, f)
# ๐ Store everything backend needs (no frontend involvement)
metadata = {
"species": species_id,
"sequence_length": SEQUENCE_LENGTH, # internal
"last_year": int(df["year"].iloc[-1]),
"last_month": int(df["month"].iloc[-1]),
"last_sequence": coords_scaled[-SEQUENCE_LENGTH:].tolist() # internal
}
with open(meta_path, "w") as f:
json.dump(metadata, f, indent=2)
print(f"[OK] Saved {model_path}, {scaler_path}, {meta_path}")
def main():
os.makedirs("models", exist_ok=True)
for species_id, ts_csv in SPECIES_FILES.items():
train_for_species(species_id, ts_csv)
if __name__ == "__main__":
main()
|