import pandas as pd
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
import warnings
warnings.filterwarnings("ignore")

model_point = []
class LSTMModel(nn.Module):
    def __init__(self, input_size=3, hidden_size=64, num_layers=2, output_size=1):
        super(LSTMModel, self).__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        out, _ = self.lstm(x)
        out = self.fc(out[:, -1, :])  # (batch_size, output_size=1)
        return out.squeeze(1)         # Squeeze output to shape (batch_size,)


def train_(data_new):
    model_point = []
    scaler_list = []

    for points in range(0, 55):
        df = data_new[data_new['ID Vị Trí'] == points]
        df["Datetime"] = pd.to_datetime(df["Datetime"], format="%H:%M %d/%m/%Y")
        # df.sort_values("Datetime", inplace=True)
        print(df.columns.tolist())

        for col in df.columns:
            if df[col].dtype == 'object':
                df[col] = df[col].str.replace(',', '.', regex=False)

        # Chuyển về kiểu float, trừ cột thời gian
        for col in df.columns:
            if col != 'Datetime':
                df[col] = pd.to_numeric(df[col], errors='coerce')
        data = df[[
            "AQI_PM2.5",
            "Kinh độ",
            "Vĩ độ"
            # "location_id"
            ]].values
        
        scaler = MinMaxScaler()
        data_scaled = scaler.fit_transform(data)

        model = LSTMModel()
        # ==== 6. Train ====
        loss_fn = nn.MSELoss()
        optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

        # ==== 3. Hàm tạo chuỗi thời gian ====
        def create_sequences(data, seq_len):
            xs, ys = [], []
            for i in range(seq_len, len(data)):
                x = data[i-seq_len:i]
                y = data[i][0]
                xs.append(x)
                ys.append(y)
            return np.array(xs), np.array(ys)

        SEQ_LEN = 24  # dùng 5 bước trước để dự đoán bước tiếp theo
        X, y = create_sequences(data_scaled, SEQ_LEN)
        # ==== 4. Chia train/test ====
        X_train, X_test, y_train, y_test = train_test_split(X, y, shuffle=False, test_size=0.2)

        X_train = torch.FloatTensor(X_train)
        y_train = torch.FloatTensor(y_train)
        X_test = torch.FloatTensor(X_test)
        y_test = torch.FloatTensor(y_test)
        X_train = torch.nan_to_num(X_train)
        y_train = torch.nan_to_num(y_train)

        train_dataset = TensorDataset(X_train, y_train)
        test_dataset = TensorDataset(X_test, y_test)

        BATCH_SIZE = 128
        train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=False)
        test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False)

        epochs = 100
        
        for epoch in range(epochs):

            model.train()
            for xb, yb in train_loader:
                
                output = model(xb)
                loss = loss_fn(output, yb)
                
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()
                
            if epoch % 50 == 0:
                print(f"Epoch {epoch}, Loss: {loss.item():.4f}")

        # ==== 7. Dự đoán ====
        model.eval()
        with torch.no_grad():
            y_pred = model(X_test)
            y_pred_extended = np.zeros((len(y_pred), 3))
            y_test_extended = np.zeros((len(y_test), 3))

            print(y_pred.size(), y_test.size())
            
            y_pred_extended[:, 0] = y_pred.squeeze()  # chỉ có giá trị PM2.5
            y_test_extended[:, 0] = y_test

            y_pred_inv = scaler.inverse_transform(y_pred_extended)[:, 0]
            y_test_inv = scaler.inverse_transform(y_test_extended)[:, 0]
        model_point.append(model)
        scaler_list.append(scaler)
        # ==== 8. Hiển thị kết quả ====
        print("\n📈 Dự đoán PM2.5:")
        for real, pred in zip(y_test_inv[:5], y_pred_inv[:5]):
            print(f"Thực tế: {real:.2f} - Dự đoán: {pred:.2f}")
        # break
    return model_point, scaler_list
    # y_pred_inv