Spaces:

NeerajCodz
/

aiBatteryLifeCycle

Running

App Files Files Community

aiBatteryLifeCycle / src /models /deep /lstm.py

NeerajCodz

feat: full project — ML simulation, dashboard UI, models on HF Hub

f381be8 6 days ago

raw

history blame contribute delete

9.4 kB

	"""
	src.models.deep.lstm
	====================
	LSTM / GRU family models for battery lifecycle sequence prediction.

	Architectures:
	1. Vanilla LSTM — 2-layer, unidirectional
	2. Bidirectional LSTM — 2-layer
	3. GRU — 2-layer
	4. Stacked LSTM with Additive Attention — 3-layer + attention

	All models accept input shape ``(batch, seq_len, n_features)`` and
	output a single scalar prediction (SOH or RUL).
	"""

	from __future__ import annotations

	import math

	import torch
	import torch.nn as nn
	import torch.nn.functional as F


	# ── 1. Vanilla LSTM ─────────────────────────────────────────────────────────
	class VanillaLSTM(nn.Module):
	"""Standard 2-layer LSTM with final hidden → linear head."""

	def __init__(self, input_dim: int, hidden_dim: int = 128,
	n_layers: int = 2, dropout: float = 0.2):
	super().__init__()
	self.lstm = nn.LSTM(
	input_dim, hidden_dim, num_layers=n_layers,
	batch_first=True, dropout=dropout if n_layers > 1 else 0,
	)
	self.dropout = nn.Dropout(dropout)
	self.fc = nn.Linear(hidden_dim, 1)

	def forward(self, x: torch.Tensor) -> torch.Tensor:
	# x: (B, T, F)
	out, (h_n, _) = self.lstm(x)
	# Use last hidden state
	h_last = self.dropout(h_n[-1]) # (B, H)
	return self.fc(h_last).squeeze(-1) # (B,)


	# ── 2. Bidirectional LSTM ────────────────────────────────────────────────────
	class BidirectionalLSTM(nn.Module):
	"""Bidirectional 2-layer LSTM."""

	def __init__(self, input_dim: int, hidden_dim: int = 128,
	n_layers: int = 2, dropout: float = 0.2):
	super().__init__()
	self.lstm = nn.LSTM(
	input_dim, hidden_dim, num_layers=n_layers,
	batch_first=True, bidirectional=True,
	dropout=dropout if n_layers > 1 else 0,
	)
	self.dropout = nn.Dropout(dropout)
	self.fc = nn.Linear(hidden_dim * 2, 1)

	def forward(self, x: torch.Tensor) -> torch.Tensor:
	out, (h_n, _) = self.lstm(x)
	# Concatenate last forward + backward hidden states
	h_fwd = h_n[-2] # last layer forward
	h_bwd = h_n[-1] # last layer backward
	h_cat = self.dropout(torch.cat([h_fwd, h_bwd], dim=-1))
	return self.fc(h_cat).squeeze(-1)


	# ── 3. GRU ───────────────────────────────────────────────────────────────────
	class GRUModel(nn.Module):
	"""2-layer GRU with linear head."""

	def __init__(self, input_dim: int, hidden_dim: int = 128,
	n_layers: int = 2, dropout: float = 0.2):
	super().__init__()
	self.gru = nn.GRU(
	input_dim, hidden_dim, num_layers=n_layers,
	batch_first=True, dropout=dropout if n_layers > 1 else 0,
	)
	self.dropout = nn.Dropout(dropout)
	self.fc = nn.Linear(hidden_dim, 1)

	def forward(self, x: torch.Tensor) -> torch.Tensor:
	out, h_n = self.gru(x)
	h_last = self.dropout(h_n[-1])
	return self.fc(h_last).squeeze(-1)


	# ── 4. Stacked LSTM with Additive Attention ─────────────────────────────────
	class AdditiveAttention(nn.Module):
	"""Bahdanau-style additive attention over LSTM hidden states."""

	def __init__(self, hidden_dim: int):
	super().__init__()
	self.W = nn.Linear(hidden_dim, hidden_dim, bias=False)
	self.v = nn.Linear(hidden_dim, 1, bias=False)

	def forward(self, lstm_outputs: torch.Tensor) -> tuple[torch.Tensor, torch.Tensor]:
	"""
	Parameters
	----------
	lstm_outputs : (B, T, H)

	Returns
	-------
	context : (B, H)
	attn_weights : (B, T)
	"""
	energy = torch.tanh(self.W(lstm_outputs)) # (B, T, H)
	scores = self.v(energy).squeeze(-1) # (B, T)
	attn_weights = F.softmax(scores, dim=-1) # (B, T)
	context = torch.bmm(attn_weights.unsqueeze(1), lstm_outputs).squeeze(1) # (B, H)
	return context, attn_weights


	class AttentionLSTM(nn.Module):
	"""3-layer stacked LSTM with additive attention and linear head."""

	def __init__(self, input_dim: int, hidden_dim: int = 128,
	n_layers: int = 3, dropout: float = 0.2):
	super().__init__()
	self.lstm = nn.LSTM(
	input_dim, hidden_dim, num_layers=n_layers,
	batch_first=True, dropout=dropout if n_layers > 1 else 0,
	)
	self.attention = AdditiveAttention(hidden_dim)
	self.dropout = nn.Dropout(dropout)
	self.fc = nn.Sequential(
	nn.Linear(hidden_dim, hidden_dim // 2),
	nn.ReLU(),
	nn.Dropout(dropout),
	nn.Linear(hidden_dim // 2, 1),
	)

	def forward(self, x: torch.Tensor) -> torch.Tensor:
	lstm_out, _ = self.lstm(x) # (B, T, H)
	context, self.attn_weights = self.attention(lstm_out) # (B, H)
	context = self.dropout(context)
	return self.fc(context).squeeze(-1) # (B,)


	# ── MC Dropout inference ────────────────────────────────────────────────────
	def mc_dropout_predict(
	model: nn.Module,
	x: torch.Tensor,
	n_samples: int = 50,
	) -> tuple[torch.Tensor, torch.Tensor]:
	"""Make predictions with MC Dropout for uncertainty estimation.

	Parameters
	----------
	model : nn.Module
	Model with Dropout layers.
	x : torch.Tensor
	Input batch.
	n_samples : int
	Number of stochastic forward passes.

	Returns
	-------
	mean : (B,)
	Mean prediction across samples.
	std : (B,)
	Standard deviation (uncertainty).
	"""
	model.train() # Enable dropout
	preds = torch.stack([model(x) for _ in range(n_samples)]) # (S, B)
	model.eval()
	return preds.mean(dim=0), preds.std(dim=0)


	# ── Training utilities ──────────────────────────────────────────────────────
	class EarlyStopping:
	"""Early stopping with patience and best-model checkpoint."""

	def __init__(self, patience: int = 20, min_delta: float = 0.0):
	self.patience = patience
	self.min_delta = min_delta
	self.counter = 0
	self.best_loss = float("inf")
	self.best_state = None

	def step(self, val_loss: float, model: nn.Module) -> bool:
	"""Returns True if training should stop."""
	if val_loss < self.best_loss - self.min_delta:
	self.best_loss = val_loss
	self.counter = 0
	self.best_state = {k: v.cpu().clone() for k, v in model.state_dict().items()}
	return False
	self.counter += 1
	return self.counter >= self.patience

	def load_best(self, model: nn.Module) -> None:
	if self.best_state is not None:
	model.load_state_dict(self.best_state)


	def train_loop(
	model: nn.Module,
	train_loader: torch.utils.data.DataLoader,
	val_loader: torch.utils.data.DataLoader,
	*,
	max_epochs: int = 150,
	lr: float = 1e-3,
	patience: int = 20,
	device: str \| torch.device = "cpu",
	grad_clip: float = 1.0,
	) -> dict:
	"""Generic training loop for all LSTM/GRU family models.

	Returns dict with train_losses, val_losses, best_epoch.
	"""
	model = model.to(device)
	optimizer = torch.optim.Adam(model.parameters(), lr=lr)
	scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=max_epochs)
	criterion = nn.L1Loss() # MAE
	early_stop = EarlyStopping(patience=patience)

	train_losses, val_losses = [], []

	for epoch in range(1, max_epochs + 1):
	# Train
	model.train()
	epoch_loss = 0.0
	n_batches = 0
	for xb, yb in train_loader:
	xb, yb = xb.to(device), yb.to(device)
	optimizer.zero_grad()
	pred = model(xb)
	loss = criterion(pred, yb)
	loss.backward()
	if grad_clip > 0:
	nn.utils.clip_grad_norm_(model.parameters(), grad_clip)
	optimizer.step()
	epoch_loss += loss.item()
	n_batches += 1
	train_losses.append(epoch_loss / max(n_batches, 1))

	# Validate
	model.eval()
	val_loss = 0.0
	n_val = 0
	with torch.no_grad():
	for xb, yb in val_loader:
	xb, yb = xb.to(device), yb.to(device)
	pred = model(xb)
	val_loss += criterion(pred, yb).item()
	n_val += 1
	val_losses.append(val_loss / max(n_val, 1))

	scheduler.step()

	if early_stop.step(val_losses[-1], model):
	break

	early_stop.load_best(model)
	return {
	"train_losses": train_losses,
	"val_losses": val_losses,
	"best_epoch": len(train_losses) - early_stop.counter,
	"epochs_trained": len(train_losses),
	}