Spaces:
Build error
Build error
LSTM error solved
Browse files
app.py
CHANGED
|
@@ -15,11 +15,14 @@ warnings.filterwarnings('ignore')
|
|
| 15 |
try:
|
| 16 |
from statsmodels.tsa.arima.model import ARIMA
|
| 17 |
import tensorflow as tf
|
| 18 |
-
from tensorflow.keras.models import load_model
|
|
|
|
|
|
|
|
|
|
| 19 |
MODELS_AVAILABLE = True
|
| 20 |
-
except ImportError:
|
| 21 |
MODELS_AVAILABLE = False
|
| 22 |
-
st.error("Required libraries not installed. Please install statsmodels and tensorflow.")
|
| 23 |
|
| 24 |
st.set_page_config(
|
| 25 |
page_title="Stock Price Forecasting: ARIMA vs LSTM",
|
|
@@ -127,73 +130,153 @@ if uploaded_file is not None:
|
|
| 127 |
|
| 128 |
if model_choice in ["LSTM", "Both Models"] and MODELS_AVAILABLE:
|
| 129 |
try:
|
| 130 |
-
# LSTM Model (
|
| 131 |
-
st.info("Training LSTM model...")
|
| 132 |
|
| 133 |
-
#
|
| 134 |
-
|
| 135 |
|
| 136 |
# Prepare LSTM data
|
| 137 |
scaler = MinMaxScaler()
|
| 138 |
scaled_data = scaler.fit_transform(ts_data.values.reshape(-1, 1))
|
|
|
|
| 139 |
|
| 140 |
-
|
| 141 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 142 |
|
| 143 |
-
|
| 144 |
-
#
|
| 145 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 146 |
|
| 147 |
-
|
| 148 |
-
|
| 149 |
-
|
| 150 |
-
|
| 151 |
-
|
| 152 |
-
|
| 153 |
-
|
| 154 |
-
|
| 155 |
-
|
| 156 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 157 |
|
| 158 |
-
# Generate forecast
|
|
|
|
|
|
|
| 159 |
lstm_forecast_scaled = []
|
| 160 |
-
last_value = scaled_data[-1][0]
|
| 161 |
|
| 162 |
-
|
| 163 |
-
last_value = max(0.0, min(1.0, last_value))
|
| 164 |
|
| 165 |
-
for
|
| 166 |
-
|
| 167 |
-
|
| 168 |
-
|
| 169 |
-
|
| 170 |
-
next_val = last_value + trend_effect + random_walk
|
| 171 |
-
|
| 172 |
-
# Keep values within reasonable bounds
|
| 173 |
-
next_val = max(0.0, min(1.0, next_val))
|
| 174 |
-
lstm_forecast_scaled.append([next_val])
|
| 175 |
-
last_value = next_val
|
| 176 |
|
| 177 |
-
lstm_forecast_scaled = np.array(lstm_forecast_scaled)
|
| 178 |
lstm_forecast = scaler.inverse_transform(lstm_forecast_scaled).flatten()
|
| 179 |
|
| 180 |
-
|
| 181 |
-
|
| 182 |
-
|
| 183 |
-
|
| 184 |
-
|
| 185 |
-
|
| 186 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 187 |
|
| 188 |
results['LSTM'] = {
|
| 189 |
'forecast': lstm_forecast,
|
| 190 |
-
'
|
|
|
|
|
|
|
| 191 |
}
|
| 192 |
|
| 193 |
except Exception as e:
|
| 194 |
-
st.error(f"LSTM model error: {str(e)}")
|
| 195 |
-
|
| 196 |
-
# Display results
|
| 197 |
if results:
|
| 198 |
# Create forecast dates
|
| 199 |
last_date = stock_data.index[-1]
|
|
|
|
| 15 |
try:
|
| 16 |
from statsmodels.tsa.arima.model import ARIMA
|
| 17 |
import tensorflow as tf
|
| 18 |
+
from tensorflow.keras.models import Sequential, load_model
|
| 19 |
+
from tensorflow.keras.layers import LSTM, Dense, Dropout
|
| 20 |
+
from tensorflow.keras.optimizers import Adam
|
| 21 |
+
from tensorflow.keras.callbacks import EarlyStopping
|
| 22 |
MODELS_AVAILABLE = True
|
| 23 |
+
except ImportError as e:
|
| 24 |
MODELS_AVAILABLE = False
|
| 25 |
+
st.error(f"Required libraries not installed. Please install statsmodels and tensorflow. Error: {e}")
|
| 26 |
|
| 27 |
st.set_page_config(
|
| 28 |
page_title="Stock Price Forecasting: ARIMA vs LSTM",
|
|
|
|
| 130 |
|
| 131 |
if model_choice in ["LSTM", "Both Models"] and MODELS_AVAILABLE:
|
| 132 |
try:
|
| 133 |
+
# LSTM Model (Real Neural Network Implementation)
|
|
|
|
| 134 |
|
| 135 |
+
# Create a cache key for this dataset
|
| 136 |
+
cache_key = f"lstm_model_{selected_company}_{len(ts_data)}"
|
| 137 |
|
| 138 |
# Prepare LSTM data
|
| 139 |
scaler = MinMaxScaler()
|
| 140 |
scaled_data = scaler.fit_transform(ts_data.values.reshape(-1, 1))
|
| 141 |
+
sequence_length = min(60, len(scaled_data) // 4)
|
| 142 |
|
| 143 |
+
if len(scaled_data) <= sequence_length + 10:
|
| 144 |
+
st.warning("Insufficient data for LSTM training. Need at least 70 data points.")
|
| 145 |
+
# Fallback to simple trend method
|
| 146 |
+
last_values = ts_data.tail(10)
|
| 147 |
+
trend = np.polyfit(range(len(last_values)), last_values, 1)[0]
|
| 148 |
+
lstm_forecast = [ts_data.iloc[-1] + trend * i for i in range(1, forecast_days + 1)]
|
| 149 |
+
|
| 150 |
+
results['LSTM (Trend Fallback)'] = {
|
| 151 |
+
'forecast': np.array(lstm_forecast),
|
| 152 |
+
'scaler': None
|
| 153 |
+
}
|
| 154 |
|
| 155 |
+
elif cache_key not in st.session_state:
|
| 156 |
+
# Train new LSTM model
|
| 157 |
+
st.info("Training LSTM model (this may take a minute)...")
|
| 158 |
+
|
| 159 |
+
# Set seeds for reproducibility
|
| 160 |
+
np.random.seed(42)
|
| 161 |
+
tf.random.set_seed(42)
|
| 162 |
+
|
| 163 |
+
def create_sequences(data, seq_length):
|
| 164 |
+
X, y = [], []
|
| 165 |
+
for i in range(len(data) - seq_length):
|
| 166 |
+
X.append(data[i:(i + seq_length)])
|
| 167 |
+
y.append(data[i + seq_length])
|
| 168 |
+
return np.array(X), np.array(y)
|
| 169 |
+
|
| 170 |
+
# Create training sequences
|
| 171 |
+
X, y = create_sequences(scaled_data, sequence_length)
|
| 172 |
+
|
| 173 |
+
# Split data for training (use 80% for training)
|
| 174 |
+
train_size = int(len(X) * 0.8)
|
| 175 |
+
X_train, X_test = X[:train_size], X[train_size:]
|
| 176 |
+
y_train, y_test = y[:train_size], y[train_size:]
|
| 177 |
+
|
| 178 |
+
# Build LSTM model
|
| 179 |
+
model = Sequential([
|
| 180 |
+
LSTM(50, return_sequences=True, input_shape=(sequence_length, 1)),
|
| 181 |
+
Dropout(0.2),
|
| 182 |
+
LSTM(50, return_sequences=False),
|
| 183 |
+
Dropout(0.2),
|
| 184 |
+
Dense(25),
|
| 185 |
+
Dense(1)
|
| 186 |
+
])
|
| 187 |
|
| 188 |
+
model.compile(optimizer=Adam(learning_rate=0.001), loss='mse')
|
| 189 |
+
|
| 190 |
+
# Training with early stopping
|
| 191 |
+
early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
|
| 192 |
+
|
| 193 |
+
# Train the model
|
| 194 |
+
with st.spinner("Training LSTM neural network..."):
|
| 195 |
+
history = model.fit(
|
| 196 |
+
X_train, y_train,
|
| 197 |
+
batch_size=32,
|
| 198 |
+
epochs=50,
|
| 199 |
+
validation_data=(X_test, y_test),
|
| 200 |
+
callbacks=[early_stopping],
|
| 201 |
+
verbose=0
|
| 202 |
+
)
|
| 203 |
+
|
| 204 |
+
# Calculate model performance on test set
|
| 205 |
+
test_predictions = model.predict(X_test, verbose=0)
|
| 206 |
+
test_predictions = scaler.inverse_transform(test_predictions)
|
| 207 |
+
y_test_actual = scaler.inverse_transform(y_test)
|
| 208 |
+
|
| 209 |
+
lstm_test_rmse = np.sqrt(mean_squared_error(y_test_actual, test_predictions))
|
| 210 |
+
st.success(f"LSTM Training Complete - Test RMSE: ${lstm_test_rmse:.2f}")
|
| 211 |
+
|
| 212 |
+
# Cache the trained model and related data
|
| 213 |
+
st.session_state[cache_key] = {
|
| 214 |
+
'model': model,
|
| 215 |
+
'scaler': scaler,
|
| 216 |
+
'test_rmse': lstm_test_rmse,
|
| 217 |
+
'sequence_length': sequence_length
|
| 218 |
+
}
|
| 219 |
|
| 220 |
+
# Generate forecast
|
| 221 |
+
st.info("Generating LSTM predictions...")
|
| 222 |
+
last_sequence = scaled_data[-sequence_length:].reshape(1, sequence_length, 1)
|
| 223 |
lstm_forecast_scaled = []
|
|
|
|
| 224 |
|
| 225 |
+
current_sequence = last_sequence.copy()
|
|
|
|
| 226 |
|
| 227 |
+
for _ in range(forecast_days):
|
| 228 |
+
next_pred = model.predict(current_sequence, verbose=0)[0, 0]
|
| 229 |
+
lstm_forecast_scaled.append(next_pred)
|
| 230 |
+
current_sequence = np.roll(current_sequence, -1, axis=1)
|
| 231 |
+
current_sequence[0, -1, 0] = next_pred
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 232 |
|
| 233 |
+
lstm_forecast_scaled = np.array(lstm_forecast_scaled).reshape(-1, 1)
|
| 234 |
lstm_forecast = scaler.inverse_transform(lstm_forecast_scaled).flatten()
|
| 235 |
|
| 236 |
+
results['LSTM'] = {
|
| 237 |
+
'forecast': lstm_forecast,
|
| 238 |
+
'model': model,
|
| 239 |
+
'scaler': scaler,
|
| 240 |
+
'test_rmse': lstm_test_rmse
|
| 241 |
+
}
|
| 242 |
+
|
| 243 |
+
else:
|
| 244 |
+
# Use cached model
|
| 245 |
+
st.info("Using cached LSTM model...")
|
| 246 |
+
cached_data = st.session_state[cache_key]
|
| 247 |
+
model = cached_data['model']
|
| 248 |
+
scaler = cached_data['scaler']
|
| 249 |
+
lstm_test_rmse = cached_data['test_rmse']
|
| 250 |
+
sequence_length = cached_data['sequence_length']
|
| 251 |
+
|
| 252 |
+
# Prepare data for cached model
|
| 253 |
+
scaled_data = scaler.transform(ts_data.values.reshape(-1, 1))
|
| 254 |
+
|
| 255 |
+
# Generate forecast with cached model
|
| 256 |
+
st.info("Generating LSTM predictions...")
|
| 257 |
+
last_sequence = scaled_data[-sequence_length:].reshape(1, sequence_length, 1)
|
| 258 |
+
lstm_forecast_scaled = []
|
| 259 |
+
|
| 260 |
+
current_sequence = last_sequence.copy()
|
| 261 |
+
|
| 262 |
+
for _ in range(forecast_days):
|
| 263 |
+
next_pred = model.predict(current_sequence, verbose=0)[0, 0]
|
| 264 |
+
lstm_forecast_scaled.append(next_pred)
|
| 265 |
+
current_sequence = np.roll(current_sequence, -1, axis=1)
|
| 266 |
+
current_sequence[0, -1, 0] = next_pred
|
| 267 |
+
|
| 268 |
+
lstm_forecast_scaled = np.array(lstm_forecast_scaled).reshape(-1, 1)
|
| 269 |
+
lstm_forecast = scaler.inverse_transform(lstm_forecast_scaled).flatten()
|
| 270 |
|
| 271 |
results['LSTM'] = {
|
| 272 |
'forecast': lstm_forecast,
|
| 273 |
+
'model': model,
|
| 274 |
+
'scaler': scaler,
|
| 275 |
+
'test_rmse': lstm_test_rmse
|
| 276 |
}
|
| 277 |
|
| 278 |
except Exception as e:
|
| 279 |
+
st.error(f"LSTM model error: {str(e)}") # Display results
|
|
|
|
|
|
|
| 280 |
if results:
|
| 281 |
# Create forecast dates
|
| 282 |
last_date = stock_data.index[-1]
|