DeepFin / deep_mql_model.py
Amós e Souza Fernandes
Upload 120 files
5f10e37 verified
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
# (Potentially add more imports for other model types or libraries later)
def preprocess_data_for_deep_mql(df: pd.DataFrame, look_back: int = 60, features_cols=['Close', 'Volume'], target_col='Close'):
"""
Prepares data for a deep learning model (e.g., LSTM) for MQL-like tasks.
- Scales features.
- Creates sequences for time series forecasting.
"""
df_copy = df.copy()
# Feature engineering (can be expanded)
df_copy['returns'] = df_copy['Close'].pct_change()
df_copy['ma10'] = df_copy['Close'].rolling(window=10).mean()
df_copy['ma30'] = df_copy['Close'].rolling(window=30).mean()
df_copy = df_copy.dropna()
all_cols = list(set(features_cols + [target_col] + ['returns', 'ma10', 'ma30']))
data_to_scale = df_copy[all_cols].values
scaler = MinMaxScaler(feature_range=(0, 1))
scaled_data = scaler.fit_transform(data_to_scale)
# Find the index of the target column in the scaled data
try:
# Attempt to get column names if df_copy[all_cols] is a DataFrame
scaled_df_cols = df_copy[all_cols].columns.tolist()
target_idx_in_scaled = scaled_df_cols.index(target_col)
except AttributeError:
# Fallback if it's already a NumPy array or has no columns attribute
# This assumes target_col was one of the original features_cols or added deterministically
# A more robust solution might be needed if column order is not guaranteed
if target_col in features_cols:
target_idx_in_scaled = features_cols.index(target_col)
elif target_col == 'returns': # Example, adjust if more features are added before target
target_idx_in_scaled = len(features_cols)
elif target_col == 'ma10':
target_idx_in_scaled = len(features_cols) + 1
elif target_col == 'ma30':
target_idx_in_scaled = len(features_cols) + 2
else: # Default to first column if not found, or raise error
print(f"Warning: Target column '{target_col}' not reliably found in scaled data. Defaulting to index 0.")
target_idx_in_scaled = 0
X, y = [], []
for i in range(look_back, len(scaled_data)):
X.append(scaled_data[i-look_back:i])
y.append(scaled_data[i, target_idx_in_scaled]) # Predicting the target column
return np.array(X), np.array(y), scaler, df_copy[all_cols].columns.tolist()
def create_deep_mql_model(input_shape):
"""
Creates a Deep Learning model (LSTM example) for MQL-like tasks.
This is a basic example and can be significantly enhanced.
"""
model = Sequential([
LSTM(units=50, return_sequences=True, input_shape=input_shape),
Dropout(0.2),
LSTM(units=50, return_sequences=False),
Dropout(0.2),
Dense(units=25, activation='relu'),
Dense(units=1) # Output: e.g., predicted price or a value to derive a signal
])
model.compile(optimizer='adam', loss='mean_squared_error')
return model
def generate_trading_signals(model, data_X, scaler, last_known_price, threshold=0.005):
"""
Generates trading signals based on model predictions.
- Predicts next step.
- Compares prediction with current price to generate signal.
Signal: 1 (Buy), -1 (Sell), 0 (Hold)
"""
# Ensure data_X is in the correct shape for prediction (e.g., for a single prediction)
# It should be (1, look_back, num_features)
if data_X.ndim == 2: # If it's a single sequence (look_back, num_features)
data_X = np.expand_dims(data_X, axis=0)
predicted_scaled_value = model.predict(data_X)
# Inverse transform:
# We need to create a dummy array with the same number of features as during scaling,
# then place our predicted value in the correct column (target column) before inverse transforming.
num_features = data_X.shape[2]
dummy_array = np.zeros((len(predicted_scaled_value), num_features))
# Assuming the model predicts the 'Close' price and 'Close' was the first feature during scaling.
# This needs to be robust. Let's assume target was the first column for simplicity here.
# A better way is to pass the index of the target column used during scaling.
target_column_index_in_scaler = 0 # Placeholder: This should match the target_col's position during scaling
dummy_array[:, target_column_index_in_scaler] = predicted_scaled_value.ravel()
try:
predicted_value = scaler.inverse_transform(dummy_array)[:, target_column_index_in_scaler]
except ValueError as e:
print(f"Error during inverse_transform: {e}")
print("Ensure dummy_array shape matches scaler's n_features_in_.")
print(f"dummy_array shape: {dummy_array.shape}, scaler.n_features_in_: {scaler.n_features_in_}")
# Fallback or re-raise
return 0
signal = 0
if predicted_value > last_known_price * (1 + threshold):
signal = 1 # Buy
elif predicted_value < last_known_price * (1 - threshold):
signal = -1 # Sell
return signal, predicted_value[0]
if __name__ == '__main__':
# Example Usage (requires financial_data_agent and more setup)
# from agents.financial_data_agent import fetch_historical_ohlcv
# raw_data_mql = fetch_historical_ohlcv("MSFT", period="2y", interval="1d")
# For demonstration, creating a dummy DataFrame:
dates_mql = pd.date_range(start='2022-01-01', periods=500, freq='B')
data_mql_np = np.random.rand(500, 5) * 150 + 50
raw_data_mql = pd.DataFrame(data_mql_np, index=dates_mql, columns=['Open', 'High', 'Low', 'Close', 'Volume'])
raw_data_mql['Close'] = raw_data_mql['Close'] + np.sin(np.linspace(0, 20, 500)) * 20 # Add some trend
if not raw_data_mql.empty:
look_back_mql = 60
# Use more features for DeepMQL
features_mql = ['Close', 'Volume', 'Open', 'High', 'Low']
target_mql = 'Close'
X_mql, y_mql, scaler_mql, scaled_cols_mql = preprocess_data_for_deep_mql(
raw_data_mql,
look_back=look_back_mql,
features_cols=features_mql,
target_col=target_mql
)
if X_mql.shape[0] > 0:
print(f"X_mql shape: {X_mql.shape}, y_mql shape: {y_mql.shape}")
# 1. Create and Train Model
model_mql = create_deep_mql_model(input_shape=(X_mql.shape[1], X_mql.shape[2]))
print("Training DeepMQL model (example)...")
# Split data for training (e.g., first 80% for train, rest for test/simulation)
train_size = int(len(X_mql) * 0.8)
X_train_mql, y_train_mql = X_mql[:train_size], y_mql[:train_size]
# Ensure y_train_mql is 2D for Keras if it's not already
if y_train_mql.ndim == 1:
y_train_mql = y_train_mql.reshape(-1, 1)
model_mql.fit(X_train_mql, y_train_mql, epochs=10, batch_size=32, verbose=1) # Few epochs for demo
print("DeepMQL model trained.")
# 2. Generate Signal for a new data point (example)
# Use the last 'look_back' points from the original data as input for prediction
if len(X_mql) > train_size:
last_sequence = X_mql[-1] # Last available sequence from our dataset (could be from test set)
# For a real scenario, this would be the latest live data
# Get the actual last known closing price from the unscaled data
# The 'last_sequence' corresponds to data up to index -1.
# The price to compare against is raw_data_mql[target_mql].iloc[-1]
# (or the close price of the last day of last_sequence)
# Find the original 'Close' price that corresponds to the end of the last_sequence
# The 'y' value for X_mql[-1] is y_mql[-1], which is the scaled target for the *next* period.
# We need the 'Close' price of the *last day included in* X_mql[-1].
# The data used for X_mql was from df_copy in preprocess_data_for_deep_mql
# The last row of df_copy that contributes to X_mql[-1] is df_copy.iloc[len(df_copy) - 1 - (len(X_mql) - (len(X_mql)-1)) ]
# This is complex. Simpler: use the last 'Close' from the original dataframe that was part of the input to scaling.
# The 'scaled_data' was created from 'df_copy'. 'X' takes from 'scaled_data'.
# The last 'Close' price in 'df_copy' before any potential future data.
# Let's get the unscaled 'Close' price from the day corresponding to the last observation in 'last_sequence'
# 'X_mql' was created from 'scaled_data'. 'scaled_data' was created from 'df_copy'.
# The last row of 'df_copy' is raw_data_mql.iloc[len(raw_data_mql)-1] after initial dropna.
# The 'last_known_price' should be the closing price of the last day in the 'last_sequence'.
# To get the actual 'Close' price for the last day of the last_sequence:
# The 'X_mql' sequences end at index i-1 of scaled_data.
# So, the last day in X_mql[-1] corresponds to scaled_data[len(scaled_data)-2].
# The corresponding original data is in df_copy used in preprocess_data_for_deep_mql.
# This df_copy was raw_data_mql after some processing.
# Let's use the last 'Close' from the raw_data_mql that was used to create X_mql.
# The number of rows in df_copy (after dropna) is len(scaled_data).
# The last 'Close' price in the data that could form a sequence.
# Simplification for example: Use the last 'Close' price from the original raw_data_mql
# This assumes raw_data_mql is aligned with the sequences.
# A more robust way is to track indices or pass the unscaled 'Close' series.
# The 'y' value is the target for the *next* period.
# The 'last_known_price' should be the 'Close' of the last day *in* the 'last_sequence'.
# If X_mql[-1] is data from t-look_back to t-1, then last_known_price is Close at t-1.
# Get the original 'Close' column from raw_data_mql that corresponds to the scaled data
original_close_series = raw_data_mql['Close'].iloc[len(raw_data_mql) - len(X_mql) + X_mql.shape[1] -1 - (len(X_mql) - (np.where(X_mql == last_sequence)[0][0])) : len(raw_data_mql) - (len(X_mql) - (np.where(X_mql == last_sequence)[0][0]))]
# This is getting too complex for the example. Let's use a simpler reference.
# The last 'Close' price in the raw_data_mql that was part of the input to the last sequence.
# If X_mql has N samples, it means we used N+look_back-1 rows of data.
# The last row of raw_data_mql used for features for X_mql[-1] is raw_data_mql.iloc[some_index_ending_the_last_sequence]
# Let's assume the last 'Close' price from the original dataframe is relevant.
# This is not perfectly aligned for prediction but good for a demo.
last_actual_close_price = raw_data_mql[target_mql].iloc[-1]
print(f"\nGenerating signal based on last sequence (shape: {last_sequence.shape})...")
print(f"Last actual close price for reference: {last_actual_close_price}")
signal, predicted_price = generate_trading_signals(
model_mql,
last_sequence,
scaler_mql, # Pass the scaler used for X_mql
last_actual_close_price, # The actual close price of the last day in 'last_sequence'
threshold=0.001 # Smaller threshold for demo
)
print(f"Predicted Next '{target_mql}': {predicted_price:.2f}")
if signal == 1:
print("Signal: BUY")
elif signal == -1:
print("Signal: SELL")
else:
print("Signal: HOLD")
else:
print("Not enough data to generate a signal after training split.")
else:
print("X_mql is empty. Check preprocessing for DeepMQL.")
else:
print("Raw data for MQL is empty. Check data fetching.")