Spaces:

astacn
/

code_predictor

Paused

App Files Files Community

code_predictor / app2.py

astacn

Update app2.py

0b0dde6 verified over 1 year ago

raw

history blame

4.36 kB

	import pandas as pd
	import numpy as np
	import baostock as bs
	from sklearn.preprocessing import MinMaxScaler, StandardScaler
	from sklearn.model_selection import train_test_split
	from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, mean_absolute_error
	from neuralprophet import NeuralProphet, set_log_level
	from torch.optim import Adam
	from torch.nn import LSTM
	import torch
	import torch.nn as nn
	import os

	# Baostock API settings
	bs.login()

	# Collect historical data
	data = bs.query_history_k_data_plus(
	"sz.000001", # Shanghai Composite Index
	"date,open,high,low,close,volume",
	start_date="2005-05-30",
	end_date="2024-01-31",
	frequency="d"
	)

	# Convert ResultData object to pandas DataFrame
	data_list = []
	while (data.error_code == '0') & data.next():
	# 获取一条记录，将记录合并在一起
	data_list.append(data.get_row_data())
	data_df = pd.DataFrame(data_list, columns=data.fields)

	# Convert 'open' and 'close' columns to numeric type
	data_df['open'] = pd.to_numeric(data_df['open'])
	data_df['close'] = pd.to_numeric(data_df['close'])

	# Filter out stocks that meet the conditions
	# Added fillna(0) to handle the None value introduced by shift(1)
	data_df = data_df[(data_df["open"] >= 0.98 * data_df["close"].shift(1).fillna(0)) & (data_df["open"] <= 1.02 * data_df["close"].shift(1).fillna(0))]
	data_df = data_df[(data_df["high"] == data_df["close"]) & (data_df["low"] == data_df["close"])] # limit-up condition
	data_df = data_df[(data_df["open"]!= 0) & (data_df["close"]!= 0)] # exclude zero prices

	# Check if data_df is empty before proceeding
	if data_df.empty:
	print("Warning: data_df is empty after filtering. Check your filtering conditions.")
	# Optionally, you can raise an exception to stop execution:
	# raise ValueError("data_df is empty after filtering.")
	else:
	# Now use data_df (the DataFrame) in train_test_split
	train_data, val_data = train_test_split(data_df, test_size=0.2, random_state=42)

	# Set log level to ERROR to suppress unnecessary warnings
	set_log_level("ERROR")

	# Specify the custom layer configuration
	custom_layer = LSTM(input_size=1, hidden_size=128, num_layers=1, batch_first=True)

	# Now initialize the NeuralProphet model
	model_np = NeuralProphet(
	n_forecasts=1,
	n_lags=30,
	n_changepoints=10,
	changepoints_range=0.8,
	learning_rate=1e-3,
	optimizer=Adam,
	)

	# Create a custom model by combining NeuralProphet with PyTorch's LSTM
	class CustomModel(nn.Module):
	def __init__(self):
	super(CustomModel, self).__init__()
	self.neural_prophet = NeuralProphet(
	n_forecasts=1,
	n_lags=30,
	n_changepoints=10,
	changepoints_range=0.8,
	learning_rate=1e-3,
	optimizer=Adam,
	)
	self.lstm = LSTM(input_size=1, hidden_size=128, num_layers=1, batch_first=True)

	def forward(self, x):
	x = self.neural_prophet(x)
	x = self.lstm(x)
	return x

	def predict(self, df):
	"""
	Custom predict method for CustomModel. Utilizes NeuralProphet's prediction.

	Args:
	df: The input DataFrame for prediction.

	Returns:
	Predictions from the NeuralProphet model.
	"""
	# Assuming your NeuralProphet model expects a DataFrame in a specific format
	# You might need to adjust this based on your data and model setup
	future = self.neural_prophet.make_future_dataframe(df, periods=1) # Adjust periods as needed
	forecast = self.neural_prophet.predict(future)
	return forecast['yhat1'].values # Or access the relevant prediction column

	# Instantiate your model
	model = CustomModel()

	# Define loss function and optimizer
	criterion = nn.BCELoss()
	optimizer = optim.Adam(model.parameters(), lr=1e-3)

	# Training loop
	def fit(model, train_data, epochs, batch_size, validation_data):
	"""
	Custom training loop for the CustomModel.

	Args:
	model: The Custom

	def predict_stock_codes(data_df):
	# Scale the data using MinMaxScaler
	scaler = MinMaxScaler()
	data_df[['open', 'high', 'low', 'close', 'volume']] = scaler.fit_transform(data_df[['open', 'high', 'low', 'close', 'volume']])

	#... (rest of the code to predict stock codes)

	return top_5_stocks