quanity_air / modules /train_local.py
alen
Upload 145 files
0640b0e verified
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
import warnings
warnings.filterwarnings("ignore")
model_point = []
class LSTMModel(nn.Module):
def __init__(self, input_size=3, hidden_size=64, num_layers=2, output_size=1):
super(LSTMModel, self).__init__()
self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
self.fc = nn.Linear(hidden_size, output_size)
def forward(self, x):
out, _ = self.lstm(x)
out = self.fc(out[:, -1, :]) # (batch_size, output_size=1)
return out.squeeze(1) # Squeeze output to shape (batch_size,)
def train_(data_new):
model_point = []
scaler_list = []
for points in range(0, 55):
df = data_new[data_new['ID Vị Trí'] == points]
df["Datetime"] = pd.to_datetime(df["Datetime"], format="%H:%M %d/%m/%Y")
# df.sort_values("Datetime", inplace=True)
print(df.columns.tolist())
for col in df.columns:
if df[col].dtype == 'object':
df[col] = df[col].str.replace(',', '.', regex=False)
# Chuyển về kiểu float, trừ cột thời gian
for col in df.columns:
if col != 'Datetime':
df[col] = pd.to_numeric(df[col], errors='coerce')
data = df[[
"AQI_PM2.5",
"Kinh độ",
"Vĩ độ"
# "location_id"
]].values
scaler = MinMaxScaler()
data_scaled = scaler.fit_transform(data)
model = LSTMModel()
# ==== 6. Train ====
loss_fn = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
# ==== 3. Hàm tạo chuỗi thời gian ====
def create_sequences(data, seq_len):
xs, ys = [], []
for i in range(seq_len, len(data)):
x = data[i-seq_len:i]
y = data[i][0]
xs.append(x)
ys.append(y)
return np.array(xs), np.array(ys)
SEQ_LEN = 24 # dùng 5 bước trước để dự đoán bước tiếp theo
X, y = create_sequences(data_scaled, SEQ_LEN)
# ==== 4. Chia train/test ====
X_train, X_test, y_train, y_test = train_test_split(X, y, shuffle=False, test_size=0.2)
X_train = torch.FloatTensor(X_train)
y_train = torch.FloatTensor(y_train)
X_test = torch.FloatTensor(X_test)
y_test = torch.FloatTensor(y_test)
X_train = torch.nan_to_num(X_train)
y_train = torch.nan_to_num(y_train)
train_dataset = TensorDataset(X_train, y_train)
test_dataset = TensorDataset(X_test, y_test)
BATCH_SIZE = 128
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False)
epochs = 100
for epoch in range(epochs):
model.train()
for xb, yb in train_loader:
output = model(xb)
loss = loss_fn(output, yb)
optimizer.zero_grad()
loss.backward()
optimizer.step()
if epoch % 50 == 0:
print(f"Epoch {epoch}, Loss: {loss.item():.4f}")
# ==== 7. Dự đoán ====
model.eval()
with torch.no_grad():
y_pred = model(X_test)
y_pred_extended = np.zeros((len(y_pred), 3))
y_test_extended = np.zeros((len(y_test), 3))
print(y_pred.size(), y_test.size())
y_pred_extended[:, 0] = y_pred.squeeze() # chỉ có giá trị PM2.5
y_test_extended[:, 0] = y_test
y_pred_inv = scaler.inverse_transform(y_pred_extended)[:, 0]
y_test_inv = scaler.inverse_transform(y_test_extended)[:, 0]
model_point.append(model)
scaler_list.append(scaler)
# ==== 8. Hiển thị kết quả ====
print("\n📈 Dự đoán PM2.5:")
for real, pred in zip(y_test_inv[:5], y_pred_inv[:5]):
print(f"Thực tế: {real:.2f} - Dự đoán: {pred:.2f}")
# break
return model_point, scaler_list
# y_pred_inv