import pandas as pd import numpy as np import baostock as bs from sklearn.preprocessing import MinMaxScaler, StandardScaler from sklearn.model_selection import train_test_split from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, mean_absolute_error from neuralprophet import NeuralProphet, set_log_level from torch.optim import Adam from torch.nn import LSTM import torch import torch.nn as nn import os # Baostock API settings bs.login() # Collect historical data data = bs.query_history_k_data_plus( "sz.000001", # Shanghai Composite Index "date,open,high,low,close,volume", start_date="2005-05-30", end_date="2024-01-31", frequency="d" ) # Convert ResultData object to pandas DataFrame data_list = [] while (data.error_code == '0') & data.next(): # 获取一条记录,将记录合并在一起 data_list.append(data.get_row_data()) data_df = pd.DataFrame(data_list, columns=data.fields) # Convert 'open' and 'close' columns to numeric type data_df['open'] = pd.to_numeric(data_df['open']) data_df['close'] = pd.to_numeric(data_df['close']) # Filter out stocks that meet the conditions # Added fillna(0) to handle the None value introduced by shift(1) data_df = data_df[(data_df["open"] >= 0.98 * data_df["close"].shift(1).fillna(0)) & (data_df["open"] <= 1.02 * data_df["close"].shift(1).fillna(0))] data_df = data_df[(data_df["high"] == data_df["close"]) & (data_df["low"] == data_df["close"])] # limit-up condition data_df = data_df[(data_df["open"]!= 0) & (data_df["close"]!= 0)] # exclude zero prices # Check if data_df is empty before proceeding if data_df.empty: print("Warning: data_df is empty after filtering. Check your filtering conditions.") # Optionally, you can raise an exception to stop execution: # raise ValueError("data_df is empty after filtering.") else: # Now use data_df (the DataFrame) in train_test_split train_data, val_data = train_test_split(data_df, test_size=0.2, random_state=42) # Set log level to ERROR to suppress unnecessary warnings set_log_level("ERROR") # Specify the custom layer configuration custom_layer = LSTM(input_size=1, hidden_size=128, num_layers=1, batch_first=True) # Now initialize the NeuralProphet model model_np = NeuralProphet( n_forecasts=1, n_lags=30, n_changepoints=10, changepoints_range=0.8, learning_rate=1e-3, optimizer=Adam, ) # Create a custom model by combining NeuralProphet with PyTorch's LSTM class CustomModel(nn.Module): def __init__(self): super(CustomModel, self).__init__() self.neural_prophet = NeuralProphet( n_forecasts=1, n_lags=30, n_changepoints=10, changepoints_range=0.8, learning_rate=1e-3, optimizer=Adam, ) self.lstm = LSTM(input_size=1, hidden_size=128, num_layers=1, batch_first=True) def forward(self, x): x = self.neural_prophet(x) x = self.lstm(x) return x def predict(self, df): """ Custom predict method for CustomModel. Utilizes NeuralProphet's prediction. Args: df: The input DataFrame for prediction. Returns: Predictions from the NeuralProphet model. """ # Assuming your NeuralProphet model expects a DataFrame in a specific format # You might need to adjust this based on your data and model setup future = self.neural_prophet.make_future_dataframe(df, periods=1) # Adjust periods as needed forecast = self.neural_prophet.predict(future) return forecast['yhat1'].values # Or access the relevant prediction column # Instantiate your model model = CustomModel() # Define loss function and optimizer criterion = nn.BCELoss() optimizer = optim.Adam(model.parameters(), lr=1e-3) # Training loop def fit(model, train_data, epochs, batch_size, validation_data): """ Custom training loop for the CustomModel. Args: model: The Custom def predict_stock_codes(data_df): # Scale the data using MinMaxScaler scaler = MinMaxScaler() data_df[['open', 'high', 'low', 'close', 'volume']] = scaler.fit_transform(data_df[['open', 'high', 'low', 'close', 'volume']]) #... (rest of the code to predict stock codes) return top_5_stocks