astacn commited on
Commit
6e106ca
·
verified ·
1 Parent(s): 2bbc031

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +173 -0
app.py ADDED
@@ -0,0 +1,173 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import akshare as ak
2
+ import pandas as pd
3
+ import numpy as np
4
+ from sklearn.preprocessing import StandardScaler
5
+ from sklearn.model_selection import train_test_split, KFold, cross_val_score, GridSearchCV
6
+ from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
7
+ import tensorflow as tf
8
+ from tensorflow.keras.models import Sequential
9
+ from tensorflow.keras.layers import LSTM, Dense, Dropout
10
+ from tensorflow.keras.wrappers.scikit_learn import KerasClassifier
11
+ from tensorflow.keras.optimizers import Adam
12
+ import datetime
13
+ import matplotlib.pyplot as plt
14
+ import requests
15
+ import time
16
+ import gradio as gr
17
+ import schedule
18
+ import threading
19
+
20
+ # Step 1: Data Acquisition and Preprocessing
21
+
22
+ def get_realtime_stock_data(symbol):
23
+ # Fetching real-time stock data using AKshare
24
+ realtime_data = ak.stock_zh_a_spot()
25
+ stock_data = realtime_data[realtime_data['代码'] == symbol]
26
+ stock_data = stock_data[['代码', '最新价', '开盘', '最高', '最低', '成交量', '换手率', '所属板块', '股票名称', '控股股东']]
27
+ stock_data.columns = ["Symbol", "Close", "Open", "High", "Low", "Volume", "Turnover", "Sector", "Stock_Name", "Major_Shareholder"]
28
+ stock_data['Date'] = pd.to_datetime(datetime.datetime.now())
29
+ stock_data.set_index('Date', inplace=True)
30
+ return stock_data
31
+
32
+ def get_stock_data(start_date, end_date):
33
+ stock_data = ak.stock_zh_a_hist(symbol="sh600000", period="daily", start_date=start_date, end_date=end_date, adjust="qfq")
34
+ stock_data = stock_data[['日期', '开盘', '收盘', '最高', '最低', '成交量', '换手率']]
35
+ stock_data.columns = ["Date", "Open", "Close", "High", "Low", "Volume", "Turnover"]
36
+ stock_data['Date'] = pd.to_datetime(stock_data['Date'])
37
+ stock_data.set_index('Date', inplace=True)
38
+ return stock_data
39
+
40
+ def get_auction_data(date):
41
+ # Example for fetching auction data (9:15-9:25) on a specific date
42
+ auction_data = ak.stock_zh_a_tick_tx(symbol="sh600000", trade_date=date.strftime("%Y%m%d"))
43
+ auction_data = auction_data[(auction_data['time'] >= '09:15:00') & (auction_data['time'] <= '09:25:00')]
44
+ auction_data['price'] = auction_data['price'].astype(float)
45
+ auction_data['volume'] = auction_data['volume'].astype(float)
46
+ auction_summary = auction_data[['price', 'volume']].agg({'price': 'mean', 'volume': 'sum'}).to_dict()
47
+ return auction_summary
48
+
49
+ def get_sentiment_data(start_date, end_date):
50
+ # Example using Baidu News Sentiment Analysis (Replace with actual data source as needed)
51
+ sentiment_data = []
52
+ date_range = pd.date_range(start=start_date, end=end_date)
53
+ for date in date_range:
54
+ # Placeholder for actual sentiment API call
55
+ sentiment_score = np.random.uniform(-1, 1) # Random score between -1 and 1 for illustration
56
+ sentiment_data.append({'Date': date, 'Sentiment': sentiment_score})
57
+ sentiment_df = pd.DataFrame(sentiment_data)
58
+ sentiment_df.set_index('Date', inplace=True)
59
+ return sentiment_df
60
+
61
+ def get_popularity_data():
62
+ # Fetching 东方财富人气指标 as an additional sentiment metric
63
+ popularity_data = ak.stock_em_hsgt_stock_statistics(symbol='沪股通')
64
+ popularity_data = popularity_data[['日期', '北向资金净买额']]
65
+ popularity_data.columns = ['Date', 'Popularity']
66
+ popularity_data['Date'] = pd.to_datetime(popularity_data['Date'])
67
+ popularity_data.set_index('Date', inplace=True)
68
+ return popularity_data
69
+
70
+ def get_valuation_data():
71
+ # Fetching 市盈率 (PE) and 市净率 (PB) as weak indicators
72
+ valuation_data = ak.stock_a_lg_indicator(symbol="sh600000")
73
+ valuation_data = valuation_data[['日期', '市盈率TTM', '市净率']]
74
+ valuation_data.columns = ['Date', 'PE_Ratio', 'PB_Ratio']
75
+ valuation_data['Date'] = pd.to_datetime(valuation_data['Date'])
76
+ valuation_data.set_index('Date', inplace=True)
77
+ return valuation_data
78
+
79
+ def preprocess_data(data, sentiment_data, popularity_data, valuation_data, auction_data):
80
+ # Merge sentiment, popularity, valuation, and auction data
81
+ data = data.join(sentiment_data, how='left')
82
+ data['Sentiment'] = data['Sentiment'].fillna(0) # Fill missing sentiment values with neutral (0)
83
+ data = data.join(popularity_data, how='left')
84
+ data['Popularity'] = data['Popularity'].fillna(0) # Fill missing popularity values with neutral (0)
85
+ data = data.join(valuation_data, how='left')
86
+ data['PE_Ratio'] = data['PE_Ratio'].fillna(data['PE_Ratio'].mean()) # Fill missing PE values with average
87
+ data['PB_Ratio'] = data['PB_Ratio'].fillna(data['PB_Ratio'].mean()) # Fill missing PB values with average
88
+ # Add auction data to main dataframe
89
+ auction_df = pd.DataFrame(auction_data).T
90
+ auction_df.columns = ['Auction_Price', 'Auction_Volume']
91
+ data = data.join(auction_df, how='left')
92
+ data['Auction_Price'] = data['Auction_Price'].fillna(data['Open']) # Fill missing auction data with opening price
93
+ data['Auction_Volume'] = data['Auction_Volume'].fillna(0) # Fill missing auction volumes with 0
94
+ # Calculate technical indicators (moving averages, MACD, etc.)
95
+ data['MA5'] = data['Close'].rolling(window=5).mean()
96
+ data['MA15'] = data['Close'].rolling(window=15).mean()
97
+ data['MACD'] = data['Close'].ewm(span=12, adjust=False).mean() - data['Close'].ewm(span=26, adjust=False).mean()
98
+ data['Signal'] = data['MACD'].ewm(span=9, adjust=False).mean()
99
+ data['Volume_Change'] = data['Volume'].pct_change() # Calculate percentage change in volume
100
+ # Filter out stocks based on opening price criteria (within ±3% of previous close)
101
+ data['Prev_Close'] = data['Close'].shift(1)
102
+ data = data[(data['Open'] <= data['Prev_Close'] * 1.03) & (data['Open'] >= data['Prev_Close'] * 0.97)]
103
+ data.dropna(inplace=True)
104
+ # Data normalization
105
+ scaler = StandardScaler()
106
+ scaled_data = scaler.fit_transform(data)
107
+ return scaled_data, data.index
108
+
109
+ # Step 2: LSTM Model Definition with Hyperparameter Tuning
110
+
111
+ def create_lstm_model(learning_rate=0.001, lstm_units=50, dropout_rate=0.2):
112
+ model = Sequential()
113
+ model.add(LSTM(lstm_units, return_sequences=True, input_shape=(60, 19))) # Adjusted to include auction data (19 features)
114
+ model.add(Dropout(dropout_rate))
115
+ model.add(LSTM(lstm_units, return_sequences=False))
116
+ model.add(Dropout(dropout_rate))
117
+ model.add(Dense(1, activation='sigmoid'))
118
+ optimizer = Adam(learning_rate=learning_rate)
119
+ model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy'])
120
+ return model
121
+
122
+ # Step 3: Cross-Validation and Hyperparameter Tuning
123
+
124
+ def cross_validate_model(X, y):
125
+ # Create the KerasClassifier wrapper
126
+ model = KerasClassifier(build_fn=create_lstm_model, epochs=50, batch_size=32, verbose=0)
127
+ # Define the KFold cross-validator
128
+ kfold = KFold(n_splits=5, shuffle=True, random_state=42)
129
+ # Perform cross-validation
130
+ results = cross_val_score(model, X, y, cv=kfold)
131
+ print(f"Cross-validation accuracy: {results.mean():.2f} (+/- {results.std():.2f})")
132
+
133
+ # Step 4: Model Optimization using Grid Search
134
+
135
+ def optimize_model(X, y):
136
+ model = KerasClassifier(build_fn=create_lstm_model, verbose=0)
137
+ param_grid = {
138
+ 'epochs': [50, 100],
139
+ 'batch_size': [32, 64],
140
+ 'learning_rate': [0.001, 0.005, 0.01],
141
+ 'lstm_units': [50, 100],
142
+ 'dropout_rate': [0.2, 0.3]
143
+ }
144
+ grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=-1, cv=3)
145
+ grid_result = grid.fit(X, y)
146
+ print(f"Best: {grid_result.best_score_} using {grid_result.best_params_}")
147
+ return grid_result.best_params_
148
+
149
+ # Step 5: Training, Evaluation, and Backtesting
150
+
151
+ def train_and_evaluate(X_train, y_train, X_test, y_test, best_params):
152
+ # Train final model with tuned parameters
153
+ final_model = create_lstm_model(
154
+ learning_rate=best_params['learning_rate'],
155
+ lstm_units=best_params['lstm_units'],
156
+ dropout_rate=best_params['dropout_rate']
157
+ )
158
+ final_model.fit(X_train, y_train, epochs=best_params['epochs'], batch_size=best_params['batch_size'], validation_split=0.2)
159
+ # Predict and Evaluate
160
+ predictions = final_model.predict(X_test)
161
+ predictions = [1 if x >= 0.5 else 0 for x in predictions]
162
+ accuracy = accuracy_score(y_test, predictions)
163
+ return accuracy
164
+
165
+ # Step 6: Deploy using Gradio
166
+
167
+ def predict(input_data):
168
+ # Assuming the input is preprocessed in the same way
169
+ scaled_input = scaler.transform(input_data)
170
+ prediction = final_model.predict(np.array([scaled_input]))
171
+ result = "Limit-Up" if prediction[0] >= 0.5 else "Not Limit-Up"
172
+ # Additional information to display
173
+ stock_info = input_data.iloc[0][['Sector', 'Symbol', 'Stock_Name', 'Major_Share