import pandas as pd
import numpy as np
import baostock as bs
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, mean_absolute_error
from neuralprophet import NeuralProphet, set_log_level
from torch.optim import Adam
from torch.nn import LSTM
import torch
import torch.nn as nn
import os

# Baostock API settings
bs.login()

# Collect historical data
data = bs.query_history_k_data_plus(
    "sz.000001",  # Shanghai Composite Index
    "date,open,high,low,close,volume",
    start_date="2005-05-30",
    end_date="2024-01-31",
    frequency="d"
)

# Convert ResultData object to pandas DataFrame
data_list = []
while (data.error_code == '0') & data.next():
    # 获取一条记录，将记录合并在一起
    data_list.append(data.get_row_data())
data_df = pd.DataFrame(data_list, columns=data.fields)

# Convert 'open' and 'close' columns to numeric type
data_df['open'] = pd.to_numeric(data_df['open'])
data_df['close'] = pd.to_numeric(data_df['close'])

# Filter out stocks that meet the conditions
# Added fillna(0) to handle the None value introduced by shift(1)
data_df = data_df[(data_df["open"] >= 0.98 * data_df["close"].shift(1).fillna(0)) & (data_df["open"] <= 1.02 * data_df["close"].shift(1).fillna(0))] 
data_df = data_df[(data_df["high"] == data_df["close"]) & (data_df["low"] == data_df["close"])]  # limit-up condition
data_df = data_df[(data_df["open"]!= 0) & (data_df["close"]!= 0)]  # exclude zero prices

# Check if data_df is empty before proceeding
if data_df.empty:
    print("Warning: data_df is empty after filtering. Check your filtering conditions.")
    # Optionally, you can raise an exception to stop execution:
    # raise ValueError("data_df is empty after filtering.")
else:
    # Now use data_df (the DataFrame) in train_test_split
    train_data, val_data = train_test_split(data_df, test_size=0.2, random_state=42)

# Set log level to ERROR to suppress unnecessary warnings
set_log_level("ERROR")

# Specify the custom layer configuration
custom_layer = LSTM(input_size=1, hidden_size=128, num_layers=1, batch_first=True)

# Now initialize the NeuralProphet model
model_np = NeuralProphet(
    n_forecasts=1,
    n_lags=30, 
    n_changepoints=10, 
    changepoints_range=0.8, 
    learning_rate=1e-3,
    optimizer=Adam,
)

# Create a custom model by combining NeuralProphet with PyTorch's LSTM
class CustomModel(nn.Module):
    def __init__(self):
        super(CustomModel, self).__init__()
        self.neural_prophet = NeuralProphet(
            n_forecasts=1,
            n_lags=30, 
            n_changepoints=10, 
            changepoints_range=0.8, 
            learning_rate=1e-3,
            optimizer=Adam,
        )
        self.lstm = LSTM(input_size=1, hidden_size=128, num_layers=1, batch_first=True)

    def forward(self, x):
        x = self.neural_prophet(x)
        x = self.lstm(x)
        return x

    def predict(self, df):
        """
        Custom predict method for CustomModel. Utilizes NeuralProphet's prediction.

        Args:
            df: The input DataFrame for prediction.

        Returns:
            Predictions from the NeuralProphet model.
        """
        # Assuming your NeuralProphet model expects a DataFrame in a specific format
        # You might need to adjust this based on your data and model setup
        future = self.neural_prophet.make_future_dataframe(df, periods=1)  # Adjust periods as needed
        forecast = self.neural_prophet.predict(future)
        return forecast['yhat1'].values  # Or access the relevant prediction column

# Instantiate your model
model = CustomModel()

# Define loss function and optimizer
criterion = nn.BCELoss()  
optimizer = optim.Adam(model.parameters(), lr=1e-3)

# Training loop
def fit(model, train_data, epochs, batch_size, validation_data):
    """
    Custom training loop for the CustomModel.

    Args:
        model: The Custom
        
def predict_stock_codes(data_df):
    # Scale the data using MinMaxScaler
    scaler = MinMaxScaler()
    data_df[['open', 'high', 'low', 'close', 'volume']] = scaler.fit_transform(data_df[['open', 'high', 'low', 'close', 'volume']])

    #... (rest of the code to predict stock codes)

    return top_5_stocks