import pandas as pd import numpy as np import torch import torch.nn as nn from torch.utils.data import DataLoader, TensorDataset from sklearn.preprocessing import MinMaxScaler from sklearn.model_selection import train_test_split import warnings warnings.filterwarnings("ignore") model_point = [] class LSTMModel(nn.Module): def __init__(self, input_size=3, hidden_size=64, num_layers=2, output_size=1): super(LSTMModel, self).__init__() self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True) self.fc = nn.Linear(hidden_size, output_size) def forward(self, x): out, _ = self.lstm(x) out = self.fc(out[:, -1, :]) # (batch_size, output_size=1) return out.squeeze(1) # Squeeze output to shape (batch_size,) def train_(data_new): model_point = [] scaler_list = [] for points in range(0, 55): df = data_new[data_new['ID Vị Trí'] == points] df["Datetime"] = pd.to_datetime(df["Datetime"], format="%H:%M %d/%m/%Y") # df.sort_values("Datetime", inplace=True) print(df.columns.tolist()) for col in df.columns: if df[col].dtype == 'object': df[col] = df[col].str.replace(',', '.', regex=False) # Chuyển về kiểu float, trừ cột thời gian for col in df.columns: if col != 'Datetime': df[col] = pd.to_numeric(df[col], errors='coerce') data = df[[ "AQI_PM2.5", "Kinh độ", "Vĩ độ" # "location_id" ]].values scaler = MinMaxScaler() data_scaled = scaler.fit_transform(data) model = LSTMModel() # ==== 6. Train ==== loss_fn = nn.MSELoss() optimizer = torch.optim.Adam(model.parameters(), lr=0.01) # ==== 3. Hàm tạo chuỗi thời gian ==== def create_sequences(data, seq_len): xs, ys = [], [] for i in range(seq_len, len(data)): x = data[i-seq_len:i] y = data[i][0] xs.append(x) ys.append(y) return np.array(xs), np.array(ys) SEQ_LEN = 24 # dùng 5 bước trước để dự đoán bước tiếp theo X, y = create_sequences(data_scaled, SEQ_LEN) # ==== 4. Chia train/test ==== X_train, X_test, y_train, y_test = train_test_split(X, y, shuffle=False, test_size=0.2) X_train = torch.FloatTensor(X_train) y_train = torch.FloatTensor(y_train) X_test = torch.FloatTensor(X_test) y_test = torch.FloatTensor(y_test) X_train = torch.nan_to_num(X_train) y_train = torch.nan_to_num(y_train) train_dataset = TensorDataset(X_train, y_train) test_dataset = TensorDataset(X_test, y_test) BATCH_SIZE = 128 train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=False) test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False) epochs = 100 for epoch in range(epochs): model.train() for xb, yb in train_loader: output = model(xb) loss = loss_fn(output, yb) optimizer.zero_grad() loss.backward() optimizer.step() if epoch % 50 == 0: print(f"Epoch {epoch}, Loss: {loss.item():.4f}") # ==== 7. Dự đoán ==== model.eval() with torch.no_grad(): y_pred = model(X_test) y_pred_extended = np.zeros((len(y_pred), 3)) y_test_extended = np.zeros((len(y_test), 3)) print(y_pred.size(), y_test.size()) y_pred_extended[:, 0] = y_pred.squeeze() # chỉ có giá trị PM2.5 y_test_extended[:, 0] = y_test y_pred_inv = scaler.inverse_transform(y_pred_extended)[:, 0] y_test_inv = scaler.inverse_transform(y_test_extended)[:, 0] model_point.append(model) scaler_list.append(scaler) # ==== 8. Hiển thị kết quả ==== print("\n📈 Dự đoán PM2.5:") for real, pred in zip(y_test_inv[:5], y_pred_inv[:5]): print(f"Thực tế: {real:.2f} - Dự đoán: {pred:.2f}") # break return model_point, scaler_list # y_pred_inv