Spaces:

dolphinnlp
/

quanity_air

Sleeping

alen

Upload 145 files

0640b0e verified 8 months ago

4.56 kB

	import pandas as pd
	import numpy as np
	import torch
	import torch.nn as nn
	from torch.utils.data import DataLoader, TensorDataset
	from sklearn.preprocessing import MinMaxScaler
	from sklearn.model_selection import train_test_split
	import warnings
	warnings.filterwarnings("ignore")

	model_point = []
	class LSTMModel(nn.Module):
	def __init__(self, input_size=3, hidden_size=64, num_layers=2, output_size=1):
	super(LSTMModel, self).__init__()
	self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
	self.fc = nn.Linear(hidden_size, output_size)

	def forward(self, x):
	out, _ = self.lstm(x)
	out = self.fc(out[:, -1, :]) # (batch_size, output_size=1)
	return out.squeeze(1) # Squeeze output to shape (batch_size,)


	def train_(data_new):
	model_point = []
	scaler_list = []

	for points in range(0, 55):
	df = data_new[data_new['ID Vị Trí'] == points]
	df["Datetime"] = pd.to_datetime(df["Datetime"], format="%H:%M %d/%m/%Y")
	# df.sort_values("Datetime", inplace=True)
	print(df.columns.tolist())

	for col in df.columns:
	if df[col].dtype == 'object':
	df[col] = df[col].str.replace(',', '.', regex=False)

	# Chuyển về kiểu float, trừ cột thời gian
	for col in df.columns:
	if col != 'Datetime':
	df[col] = pd.to_numeric(df[col], errors='coerce')
	data = df[[
	"AQI_PM2.5",
	"Kinh độ",
	"Vĩ độ"
	# "location_id"
	]].values

	scaler = MinMaxScaler()
	data_scaled = scaler.fit_transform(data)

	model = LSTMModel()
	# ==== 6. Train ====
	loss_fn = nn.MSELoss()
	optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

	# ==== 3. Hàm tạo chuỗi thời gian ====
	def create_sequences(data, seq_len):
	xs, ys = [], []
	for i in range(seq_len, len(data)):
	x = data[i-seq_len:i]
	y = data[i][0]
	xs.append(x)
	ys.append(y)
	return np.array(xs), np.array(ys)

	SEQ_LEN = 24 # dùng 5 bước trước để dự đoán bước tiếp theo
	X, y = create_sequences(data_scaled, SEQ_LEN)
	# ==== 4. Chia train/test ====
	X_train, X_test, y_train, y_test = train_test_split(X, y, shuffle=False, test_size=0.2)

	X_train = torch.FloatTensor(X_train)
	y_train = torch.FloatTensor(y_train)
	X_test = torch.FloatTensor(X_test)
	y_test = torch.FloatTensor(y_test)
	X_train = torch.nan_to_num(X_train)
	y_train = torch.nan_to_num(y_train)

	train_dataset = TensorDataset(X_train, y_train)
	test_dataset = TensorDataset(X_test, y_test)

	BATCH_SIZE = 128
	train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=False)
	test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False)

	epochs = 100

	for epoch in range(epochs):

	model.train()
	for xb, yb in train_loader:

	output = model(xb)
	loss = loss_fn(output, yb)

	optimizer.zero_grad()
	loss.backward()
	optimizer.step()

	if epoch % 50 == 0:
	print(f"Epoch {epoch}, Loss: {loss.item():.4f}")

	# ==== 7. Dự đoán ====
	model.eval()
	with torch.no_grad():
	y_pred = model(X_test)
	y_pred_extended = np.zeros((len(y_pred), 3))
	y_test_extended = np.zeros((len(y_test), 3))

	print(y_pred.size(), y_test.size())

	y_pred_extended[:, 0] = y_pred.squeeze() # chỉ có giá trị PM2.5
	y_test_extended[:, 0] = y_test

	y_pred_inv = scaler.inverse_transform(y_pred_extended)[:, 0]
	y_test_inv = scaler.inverse_transform(y_test_extended)[:, 0]
	model_point.append(model)
	scaler_list.append(scaler)
	# ==== 8. Hiển thị kết quả ====
	print("\n📈 Dự đoán PM2.5:")
	for real, pred in zip(y_test_inv[:5], y_pred_inv[:5]):
	print(f"Thực tế: {real:.2f} - Dự đoán: {pred:.2f}")
	# break
	return model_point, scaler_list
	# y_pred_inv