Spaces:

chinmay0805
/

Fish-Migration-Pattern

Sleeping

App Files Files Community

Fish-Migration-Pattern / train.py

chinmay0805

Upload 5 files

45e0498 verified 6 months ago

raw

history blame contribute delete

3.2 kB

	# multi_species_pipeline.py

	import os
	import json
	import pickle
	import numpy as np
	import pandas as pd

	from sklearn.preprocessing import MinMaxScaler
	from tensorflow.keras.models import Sequential
	from tensorflow.keras.layers import LSTM, Dense

	# -------- CONFIG --------

	SPECIES_FILES = {
	"mackerel": "migration_timeseries_mackerel.csv",
	"sardinella": "migration_timeseries_sardinella.csv",
	"scomber": "migration_timeseries_scomber.csv",
	"skipjack": "migration_timeseries_skipjack.csv",
	"tuna": "migration_timeseries_tuna.csv",
	}

	# 🚨 This is ONLY a training hyperparameter (not exposed to frontend)
	SEQUENCE_LENGTH = 3


	def train_for_species(species_id: str, ts_csv: str):
	if not os.path.exists(ts_csv):
	print(f"[WARN] Timeseries CSV not found for {species_id}: {ts_csv}")
	return

	print(f"\n=== Training LSTM for {species_id} from {ts_csv} ===")

	df = pd.read_csv(ts_csv)
	df = df.sort_values(["year", "month"]).reset_index(drop=True)

	required = {"year", "month", "decimalLatitude", "decimalLongitude"}
	missing = required - set(df.columns)
	if missing:
	print(f"[ERROR] Missing columns {missing} in {ts_csv}")
	return

	coords = df[["decimalLatitude", "decimalLongitude"]].values

	scaler = MinMaxScaler()
	coords_scaled = scaler.fit_transform(coords)

	X, y = [], []
	for i in range(SEQUENCE_LENGTH, len(coords_scaled)):
	X.append(coords_scaled[i - SEQUENCE_LENGTH:i])
	y.append(coords_scaled[i])

	X = np.array(X)
	y = np.array(y)

	if len(X) == 0:
	print(f"[ERROR] Not enough data to train for {species_id}")
	return

	model = Sequential()
	model.add(LSTM(64, activation="tanh", input_shape=(SEQUENCE_LENGTH, 2)))
	model.add(Dense(32, activation="relu"))
	model.add(Dense(2))
	model.compile(optimizer="adam", loss="mse")

	model.fit(X, y, epochs=50, batch_size=8, verbose=1)

	out_dir = os.path.join("models", species_id)
	os.makedirs(out_dir, exist_ok=True)

	# 🔹 Species-specific filenames
	model_path = os.path.join(out_dir, f"{species_id}_model.h5")
	scaler_path = os.path.join(out_dir, f"{species_id}_scaler.pkl")
	meta_path = os.path.join(out_dir, f"{species_id}_metadata.json")

	model.save(model_path)

	with open(scaler_path, "wb") as f:
	pickle.dump(scaler, f)

	# 👉 Store everything backend needs (no frontend involvement)
	metadata = {
	"species": species_id,
	"sequence_length": SEQUENCE_LENGTH, # internal
	"last_year": int(df["year"].iloc[-1]),
	"last_month": int(df["month"].iloc[-1]),
	"last_sequence": coords_scaled[-SEQUENCE_LENGTH:].tolist() # internal
	}

	with open(meta_path, "w") as f:
	json.dump(metadata, f, indent=2)

	print(f"[OK] Saved {model_path}, {scaler_path}, {meta_path}")


	def main():
	os.makedirs("models", exist_ok=True)
	for species_id, ts_csv in SPECIES_FILES.items():
	train_for_species(species_id, ts_csv)


	if __name__ == "__main__":
	main()