Spaces:
Sleeping
Sleeping
| import pandas as pd | |
| import numpy as np | |
| import torch | |
| import torch.nn as nn | |
| from torch.utils.data import DataLoader, TensorDataset | |
| from sklearn.preprocessing import MinMaxScaler | |
| from sklearn.model_selection import train_test_split | |
| import warnings | |
| warnings.filterwarnings("ignore") | |
| model_point = [] | |
| class LSTMModel(nn.Module): | |
| def __init__(self, input_size=3, hidden_size=64, num_layers=2, output_size=1): | |
| super(LSTMModel, self).__init__() | |
| self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True) | |
| self.fc = nn.Linear(hidden_size, output_size) | |
| def forward(self, x): | |
| out, _ = self.lstm(x) | |
| out = self.fc(out[:, -1, :]) # (batch_size, output_size=1) | |
| return out.squeeze(1) # Squeeze output to shape (batch_size,) | |
| def train_(data_new): | |
| model_point = [] | |
| scaler_list = [] | |
| for points in range(0, 55): | |
| df = data_new[data_new['ID Vị Trí'] == points] | |
| df["Datetime"] = pd.to_datetime(df["Datetime"], format="%H:%M %d/%m/%Y") | |
| # df.sort_values("Datetime", inplace=True) | |
| print(df.columns.tolist()) | |
| for col in df.columns: | |
| if df[col].dtype == 'object': | |
| df[col] = df[col].str.replace(',', '.', regex=False) | |
| # Chuyển về kiểu float, trừ cột thời gian | |
| for col in df.columns: | |
| if col != 'Datetime': | |
| df[col] = pd.to_numeric(df[col], errors='coerce') | |
| data = df[[ | |
| "AQI_PM2.5", | |
| "Kinh độ", | |
| "Vĩ độ" | |
| # "location_id" | |
| ]].values | |
| scaler = MinMaxScaler() | |
| data_scaled = scaler.fit_transform(data) | |
| model = LSTMModel() | |
| # ==== 6. Train ==== | |
| loss_fn = nn.MSELoss() | |
| optimizer = torch.optim.Adam(model.parameters(), lr=0.01) | |
| # ==== 3. Hàm tạo chuỗi thời gian ==== | |
| def create_sequences(data, seq_len): | |
| xs, ys = [], [] | |
| for i in range(seq_len, len(data)): | |
| x = data[i-seq_len:i] | |
| y = data[i][0] | |
| xs.append(x) | |
| ys.append(y) | |
| return np.array(xs), np.array(ys) | |
| SEQ_LEN = 24 # dùng 5 bước trước để dự đoán bước tiếp theo | |
| X, y = create_sequences(data_scaled, SEQ_LEN) | |
| # ==== 4. Chia train/test ==== | |
| X_train, X_test, y_train, y_test = train_test_split(X, y, shuffle=False, test_size=0.2) | |
| X_train = torch.FloatTensor(X_train) | |
| y_train = torch.FloatTensor(y_train) | |
| X_test = torch.FloatTensor(X_test) | |
| y_test = torch.FloatTensor(y_test) | |
| X_train = torch.nan_to_num(X_train) | |
| y_train = torch.nan_to_num(y_train) | |
| train_dataset = TensorDataset(X_train, y_train) | |
| test_dataset = TensorDataset(X_test, y_test) | |
| BATCH_SIZE = 128 | |
| train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=False) | |
| test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False) | |
| epochs = 100 | |
| for epoch in range(epochs): | |
| model.train() | |
| for xb, yb in train_loader: | |
| output = model(xb) | |
| loss = loss_fn(output, yb) | |
| optimizer.zero_grad() | |
| loss.backward() | |
| optimizer.step() | |
| if epoch % 50 == 0: | |
| print(f"Epoch {epoch}, Loss: {loss.item():.4f}") | |
| # ==== 7. Dự đoán ==== | |
| model.eval() | |
| with torch.no_grad(): | |
| y_pred = model(X_test) | |
| y_pred_extended = np.zeros((len(y_pred), 3)) | |
| y_test_extended = np.zeros((len(y_test), 3)) | |
| print(y_pred.size(), y_test.size()) | |
| y_pred_extended[:, 0] = y_pred.squeeze() # chỉ có giá trị PM2.5 | |
| y_test_extended[:, 0] = y_test | |
| y_pred_inv = scaler.inverse_transform(y_pred_extended)[:, 0] | |
| y_test_inv = scaler.inverse_transform(y_test_extended)[:, 0] | |
| model_point.append(model) | |
| scaler_list.append(scaler) | |
| # ==== 8. Hiển thị kết quả ==== | |
| print("\n📈 Dự đoán PM2.5:") | |
| for real, pred in zip(y_test_inv[:5], y_pred_inv[:5]): | |
| print(f"Thực tế: {real:.2f} - Dự đoán: {pred:.2f}") | |
| # break | |
| return model_point, scaler_list | |
| # y_pred_inv |