Spaces:

chinmay0805
/

Fish-Migration-Pattern

Sleeping

App Files Files Community

chinmay0805 commited on Dec 8, 2025

Commit

45e0498

verified ·

1 Parent(s): 2b4d993

Upload 5 files

Browse files

Files changed (5) hide show

DockerFile +25 -0
main.py +182 -0
metadata.json +20 -0
requirements.txt +7 -0
train.py +103 -0

DockerFile ADDED Viewed

	@@ -0,0 +1,25 @@

+FROM python:3.11-slim
+# Avoid buffering logs
+ENV PYTHONUNBUFFERED=1
+# Workdir inside container
+WORKDIR /app
+# Install system deps (optional but safe)
+RUN apt-get update && apt-get install -y \
+    build-essential \
+    && rm -rf /var/lib/apt/lists/*
+# Copy requirements and install
+COPY requirements.txt .
+RUN pip install --no-cache-dir -r requirements.txt
+# Copy all project files
+COPY . .
+# Expose port used by Hugging Face (must be 7860)
+EXPOSE 7860
+# Run FastAPI with uvicorn
+CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]

main.py ADDED Viewed

	@@ -0,0 +1,182 @@

+# index.py
+import os
+import json
+import pickle
+import numpy as np
+from typing import List
+from fastapi import FastAPI, Query, HTTPException
+from fastapi.middleware.cors import CORSMiddleware
+from pydantic import BaseModel
+from tensorflow.keras.models import load_model
+# ==========================
+# CONFIG
+# ==========================
+MODELS_BASE_DIR = "models"
+# These must match folder names under models/
+SPECIES_LIST = [
+    "mackerel",
+    "sardinella",
+    "scomber",
+    "skipjack",
+    "tuna",
+]
+# Cache: species_id -> (model, scaler, meta, last_seq_scaled)
+ARTIFACT_CACHE = {}
+def load_artifacts(species_id: str):
+    """
+    Load model, scaler, metadata, and last sequence for a given species.
+    Uses in-memory cache so subsequent calls are fast.
+    """
+    if species_id in ARTIFACT_CACHE:
+        return ARTIFACT_CACHE[species_id]
+    if species_id not in SPECIES_LIST:
+        raise ValueError(f"Unknown species '{species_id}'. Allowed: {SPECIES_LIST}")
+    base_dir = os.path.join(MODELS_BASE_DIR, species_id)
+    model_path = os.path.join(base_dir, f"{species_id}_model.h5")
+    scaler_path = os.path.join(base_dir, f"{species_id}_scaler.pkl")
+    meta_path = os.path.join(base_dir, f"{species_id}_metadata.json")
+    if not (os.path.exists(model_path) and os.path.exists(scaler_path) and os.path.exists(meta_path)):
+        raise FileNotFoundError(f"Artifacts not found for species '{species_id}' in {base_dir}")
+    # Load model
+    model = load_model(model_path, compile=False)
+    # Load scaler
+    with open(scaler_path, "rb") as f:
+        scaler = pickle.load(f)
+    # Load metadata
+    with open(meta_path, "r") as f:
+        meta = json.load(f)
+    seq_len = int(meta["sequence_length"])
+    last_seq_scaled = np.array(meta["last_sequence"]).reshape(1, seq_len, 2)
+    ARTIFACT_CACHE[species_id] = (model, scaler, meta, last_seq_scaled)
+    return ARTIFACT_CACHE[species_id]
+# ==========================
+# FASTAPI SETUP
+# ==========================
+app = FastAPI(title="Multi-Species Fish Migration LSTM API")
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],   # restrict in production
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+class PredictionPoint(BaseModel):
+    year: int
+    month: int
+    latitude: float
+    longitude: float
+class PredictionResponse(BaseModel):
+    species: str
+    months_requested: int
+    sequence_length_used: int
+    points: List[PredictionPoint]
+# ==========================
+# CORE PREDICTION LOGIC
+# ==========================
+def predict_future_months(species_id: str, n_months: int):
+    """
+    Predict n_months into the future for a given species.
+    Uses:
+      - last_year, last_month from metadata
+      - last_sequence (scaled) from metadata
+      - sequence_length from metadata
+    """
+    model, scaler, meta, last_seq_scaled = load_artifacts(species_id)
+    seq_len = int(meta["sequence_length"])
+    year = int(meta["last_year"])
+    month = int(meta["last_month"])
+    seq = last_seq_scaled.copy()
+    results = []
+    for _ in range(n_months):
+        # 1. predict next step (scaled)
+        pred_scaled = model.predict(seq, verbose=0)   # shape (1, 2)
+        # 2. convert back to real lat/lon
+        pred = scaler.inverse_transform(pred_scaled)[0]  # shape (2,)
+        # 3. advance calendar by one month
+        month += 1
+        if month > 12:
+            month = 1
+            year += 1
+        results.append(
+            {
+                "year": int(year),
+                "month": int(month),
+                "latitude": float(pred[0]),
+                "longitude": float(pred[1]),
+            }
+        )
+        # 4. slide window: drop oldest, add new prediction
+        new_seq = np.vstack([seq[0][1:], pred_scaled[0]])  # (seq_len, 2)
+        seq = new_seq.reshape(1, seq_len, 2)
+    return results, seq_len
+# ==========================
+# ENDPOINTS
+# ==========================
+@app.get("/predict-migration", response_model=PredictionResponse)
+def predict_migration(
+    species: str = Query("mackerel", description="Species ID (e.g., mackerel, sardinella)"),
+    months: int = Query(6, ge=1, le=24, description="Number of future months to predict"),
+):
+    """
+    Example:
+      GET /predict-migration?species=mackerel&months=12
+    """
+    try:
+        points, seq_len_used = predict_future_months(species, months)
+    except Exception as e:
+        raise HTTPException(status_code=400, detail=str(e))
+    return PredictionResponse(
+        species=species,
+        months_requested=months,
+        sequence_length_used=seq_len_used,
+        points=[PredictionPoint(**p) for p in points],
+    )
+@app.get("/")
+def root():
+    return {
+        "message": "Multi-Species Fish Migration LSTM API is running",
+        "available_species": SPECIES_LIST,
+        "example": "/predict-migration?species=mackerel&months=12",
+    }

metadata.json ADDED Viewed

	@@ -0,0 +1,20 @@

+{
+  "species": "sardinella",
+  "last_year": 2012,
+  "last_month": 9,
+  "sequence_length": 3,
+  "last_sequence": [
+    [
+      0.5544831090300122,
+      0.3959002296999068
+    ],
+    [
+      0.5473025885600646,
+      0.39520740517616026
+    ],
+    [
+      0.4158765115337282,
+      0.3960623952468836
+    ]
+  ]
+}

requirements.txt ADDED Viewed

	@@ -0,0 +1,7 @@

+fastapi
+uvicorn[standard]
+tensorflow-cpu
+numpy
+pandas
+scikit-learn

train.py ADDED Viewed

	@@ -0,0 +1,103 @@

+# multi_species_pipeline.py
+import os
+import json
+import pickle
+import numpy as np
+import pandas as pd
+from sklearn.preprocessing import MinMaxScaler
+from tensorflow.keras.models import Sequential
+from tensorflow.keras.layers import LSTM, Dense
+# -------- CONFIG --------
+SPECIES_FILES = {
+    "mackerel":   "migration_timeseries_mackerel.csv",
+    "sardinella": "migration_timeseries_sardinella.csv",
+    "scomber":    "migration_timeseries_scomber.csv",
+    "skipjack":   "migration_timeseries_skipjack.csv",
+    "tuna":       "migration_timeseries_tuna.csv",
+}
+# 🚨 This is ONLY a training hyperparameter (not exposed to frontend)
+SEQUENCE_LENGTH = 3
+def train_for_species(species_id: str, ts_csv: str):
+    if not os.path.exists(ts_csv):
+        print(f"[WARN] Timeseries CSV not found for {species_id}: {ts_csv}")
+        return
+    print(f"\n=== Training LSTM for {species_id} from {ts_csv} ===")
+    df = pd.read_csv(ts_csv)
+    df = df.sort_values(["year", "month"]).reset_index(drop=True)
+    required = {"year", "month", "decimalLatitude", "decimalLongitude"}
+    missing = required - set(df.columns)
+    if missing:
+        print(f"[ERROR] Missing columns {missing} in {ts_csv}")
+        return
+    coords = df[["decimalLatitude", "decimalLongitude"]].values
+    scaler = MinMaxScaler()
+    coords_scaled = scaler.fit_transform(coords)
+    X, y = [], []
+    for i in range(SEQUENCE_LENGTH, len(coords_scaled)):
+        X.append(coords_scaled[i - SEQUENCE_LENGTH:i])
+        y.append(coords_scaled[i])
+    X = np.array(X)
+    y = np.array(y)
+    if len(X) == 0:
+        print(f"[ERROR] Not enough data to train for {species_id}")
+        return
+    model = Sequential()
+    model.add(LSTM(64, activation="tanh", input_shape=(SEQUENCE_LENGTH, 2)))
+    model.add(Dense(32, activation="relu"))
+    model.add(Dense(2))
+    model.compile(optimizer="adam", loss="mse")
+    model.fit(X, y, epochs=50, batch_size=8, verbose=1)
+    out_dir = os.path.join("models", species_id)
+    os.makedirs(out_dir, exist_ok=True)
+    # 🔹 Species-specific filenames
+    model_path = os.path.join(out_dir, f"{species_id}_model.h5")
+    scaler_path = os.path.join(out_dir, f"{species_id}_scaler.pkl")
+    meta_path = os.path.join(out_dir, f"{species_id}_metadata.json")
+    model.save(model_path)
+    with open(scaler_path, "wb") as f:
+        pickle.dump(scaler, f)
+    # 👉 Store everything backend needs (no frontend involvement)
+    metadata = {
+        "species": species_id,
+        "sequence_length": SEQUENCE_LENGTH,                         # internal
+        "last_year": int(df["year"].iloc[-1]),
+        "last_month": int(df["month"].iloc[-1]),
+        "last_sequence": coords_scaled[-SEQUENCE_LENGTH:].tolist()  # internal
+    }
+    with open(meta_path, "w") as f:
+        json.dump(metadata, f, indent=2)
+    print(f"[OK] Saved {model_path}, {scaler_path}, {meta_path}")
+def main():
+    os.makedirs("models", exist_ok=True)
+    for species_id, ts_csv in SPECIES_FILES.items():
+        train_for_species(species_id, ts_csv)
+if __name__ == "__main__":
+    main()