File size: 12,754 Bytes
5f10e37
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
# (Potentially add more imports for other model types or libraries later)

def preprocess_data_for_deep_mql(df: pd.DataFrame, look_back: int = 60, features_cols=['Close', 'Volume'], target_col='Close'):
    """
    Prepares data for a deep learning model (e.g., LSTM) for MQL-like tasks.
    - Scales features.
    - Creates sequences for time series forecasting.
    """
    df_copy = df.copy()
    
    # Feature engineering (can be expanded)
    df_copy['returns'] = df_copy['Close'].pct_change()
    df_copy['ma10'] = df_copy['Close'].rolling(window=10).mean()
    df_copy['ma30'] = df_copy['Close'].rolling(window=30).mean()
    df_copy = df_copy.dropna()

    all_cols = list(set(features_cols + [target_col] + ['returns', 'ma10', 'ma30']))
    data_to_scale = df_copy[all_cols].values
    
    scaler = MinMaxScaler(feature_range=(0, 1))
    scaled_data = scaler.fit_transform(data_to_scale)
    
    # Find the index of the target column in the scaled data
    try:
        # Attempt to get column names if df_copy[all_cols] is a DataFrame
        scaled_df_cols = df_copy[all_cols].columns.tolist()
        target_idx_in_scaled = scaled_df_cols.index(target_col)
    except AttributeError:
        # Fallback if it's already a NumPy array or has no columns attribute
        # This assumes target_col was one of the original features_cols or added deterministically
        # A more robust solution might be needed if column order is not guaranteed
        if target_col in features_cols:
            target_idx_in_scaled = features_cols.index(target_col)
        elif target_col == 'returns': # Example, adjust if more features are added before target
             target_idx_in_scaled = len(features_cols)
        elif target_col == 'ma10':
             target_idx_in_scaled = len(features_cols) + 1
        elif target_col == 'ma30':
             target_idx_in_scaled = len(features_cols) + 2
        else: # Default to first column if not found, or raise error
            print(f"Warning: Target column '{target_col}' not reliably found in scaled data. Defaulting to index 0.")
            target_idx_in_scaled = 0


    X, y = [], []
    for i in range(look_back, len(scaled_data)):
        X.append(scaled_data[i-look_back:i])
        y.append(scaled_data[i, target_idx_in_scaled]) # Predicting the target column
        
    return np.array(X), np.array(y), scaler, df_copy[all_cols].columns.tolist()

def create_deep_mql_model(input_shape):
    """
    Creates a Deep Learning model (LSTM example) for MQL-like tasks.
    This is a basic example and can be significantly enhanced.
    """
    model = Sequential([
        LSTM(units=50, return_sequences=True, input_shape=input_shape),
        Dropout(0.2),
        LSTM(units=50, return_sequences=False),
        Dropout(0.2),
        Dense(units=25, activation='relu'),
        Dense(units=1) # Output: e.g., predicted price or a value to derive a signal
    ])
    model.compile(optimizer='adam', loss='mean_squared_error')
    return model

def generate_trading_signals(model, data_X, scaler, last_known_price, threshold=0.005):
    """
    Generates trading signals based on model predictions.
    - Predicts next step.
    - Compares prediction with current price to generate signal.
    
    Signal: 1 (Buy), -1 (Sell), 0 (Hold)
    """
    # Ensure data_X is in the correct shape for prediction (e.g., for a single prediction)
    # It should be (1, look_back, num_features)
    if data_X.ndim == 2: # If it's a single sequence (look_back, num_features)
        data_X = np.expand_dims(data_X, axis=0)

    predicted_scaled_value = model.predict(data_X)
    
    # Inverse transform:
    # We need to create a dummy array with the same number of features as during scaling,
    # then place our predicted value in the correct column (target column) before inverse transforming.
    num_features = data_X.shape[2]
    dummy_array = np.zeros((len(predicted_scaled_value), num_features))
    
    # Assuming the model predicts the 'Close' price and 'Close' was the first feature during scaling.
    # This needs to be robust. Let's assume target was the first column for simplicity here.
    # A better way is to pass the index of the target column used during scaling.
    target_column_index_in_scaler = 0 # Placeholder: This should match the target_col's position during scaling
    dummy_array[:, target_column_index_in_scaler] = predicted_scaled_value.ravel()
    
    try:
        predicted_value = scaler.inverse_transform(dummy_array)[:, target_column_index_in_scaler]
    except ValueError as e:
        print(f"Error during inverse_transform: {e}")
        print("Ensure dummy_array shape matches scaler's n_features_in_.")
        print(f"dummy_array shape: {dummy_array.shape}, scaler.n_features_in_: {scaler.n_features_in_}")
        # Fallback or re-raise
        return 0 


    signal = 0
    if predicted_value > last_known_price * (1 + threshold):
        signal = 1 # Buy
    elif predicted_value < last_known_price * (1 - threshold):
        signal = -1 # Sell
    return signal, predicted_value[0]


if __name__ == '__main__':
    # Example Usage (requires financial_data_agent and more setup)
    # from agents.financial_data_agent import fetch_historical_ohlcv
    # raw_data_mql = fetch_historical_ohlcv("MSFT", period="2y", interval="1d")
    
    # For demonstration, creating a dummy DataFrame:
    dates_mql = pd.date_range(start='2022-01-01', periods=500, freq='B')
    data_mql_np = np.random.rand(500, 5) * 150 + 50
    raw_data_mql = pd.DataFrame(data_mql_np, index=dates_mql, columns=['Open', 'High', 'Low', 'Close', 'Volume'])
    raw_data_mql['Close'] = raw_data_mql['Close'] + np.sin(np.linspace(0, 20, 500)) * 20 # Add some trend

    if not raw_data_mql.empty:
        look_back_mql = 60
        # Use more features for DeepMQL
        features_mql = ['Close', 'Volume', 'Open', 'High', 'Low'] 
        target_mql = 'Close'

        X_mql, y_mql, scaler_mql, scaled_cols_mql = preprocess_data_for_deep_mql(
            raw_data_mql, 
            look_back=look_back_mql,
            features_cols=features_mql,
            target_col=target_mql
        )

        if X_mql.shape[0] > 0:
            print(f"X_mql shape: {X_mql.shape}, y_mql shape: {y_mql.shape}")

            # 1. Create and Train Model
            model_mql = create_deep_mql_model(input_shape=(X_mql.shape[1], X_mql.shape[2]))
            print("Training DeepMQL model (example)...")
            # Split data for training (e.g., first 80% for train, rest for test/simulation)
            train_size = int(len(X_mql) * 0.8)
            X_train_mql, y_train_mql = X_mql[:train_size], y_mql[:train_size]
            
            # Ensure y_train_mql is 2D for Keras if it's not already
            if y_train_mql.ndim == 1:
                y_train_mql = y_train_mql.reshape(-1, 1)

            model_mql.fit(X_train_mql, y_train_mql, epochs=10, batch_size=32, verbose=1) # Few epochs for demo
            print("DeepMQL model trained.")

            # 2. Generate Signal for a new data point (example)
            # Use the last 'look_back' points from the original data as input for prediction
            if len(X_mql) > train_size:
                last_sequence = X_mql[-1] # Last available sequence from our dataset (could be from test set)
                # For a real scenario, this would be the latest live data
                
                # Get the actual last known closing price from the unscaled data
                # The 'last_sequence' corresponds to data up to index -1.
                # The price to compare against is raw_data_mql[target_mql].iloc[-1]
                # (or the close price of the last day of last_sequence)
                
                # Find the original 'Close' price that corresponds to the end of the last_sequence
                # The 'y' value for X_mql[-1] is y_mql[-1], which is the scaled target for the *next* period.
                # We need the 'Close' price of the *last day included in* X_mql[-1].
                # The data used for X_mql was from df_copy in preprocess_data_for_deep_mql
                # The last row of df_copy that contributes to X_mql[-1] is df_copy.iloc[len(df_copy) - 1 - (len(X_mql) - (len(X_mql)-1)) ]
                # This is complex. Simpler: use the last 'Close' from the original dataframe that was part of the input to scaling.
                # The 'scaled_data' was created from 'df_copy'. 'X' takes from 'scaled_data'.
                # The last 'Close' price in 'df_copy' before any potential future data.
                
                # Let's get the unscaled 'Close' price from the day corresponding to the last observation in 'last_sequence'
                # 'X_mql' was created from 'scaled_data'. 'scaled_data' was created from 'df_copy'.
                # The last row of 'df_copy' is raw_data_mql.iloc[len(raw_data_mql)-1] after initial dropna.
                # The 'last_known_price' should be the closing price of the last day in the 'last_sequence'.
                
                # To get the actual 'Close' price for the last day of the last_sequence:
                # The 'X_mql' sequences end at index i-1 of scaled_data.
                # So, the last day in X_mql[-1] corresponds to scaled_data[len(scaled_data)-2].
                # The corresponding original data is in df_copy used in preprocess_data_for_deep_mql.
                # This df_copy was raw_data_mql after some processing.
                # Let's use the last 'Close' from the raw_data_mql that was used to create X_mql.
                # The number of rows in df_copy (after dropna) is len(scaled_data).
                # The last 'Close' price in the data that could form a sequence.
                
                # Simplification for example: Use the last 'Close' price from the original raw_data_mql
                # This assumes raw_data_mql is aligned with the sequences.
                # A more robust way is to track indices or pass the unscaled 'Close' series.
                
                # The 'y' value is the target for the *next* period.
                # The 'last_known_price' should be the 'Close' of the last day *in* the 'last_sequence'.
                # If X_mql[-1] is data from t-look_back to t-1, then last_known_price is Close at t-1.
                
                # Get the original 'Close' column from raw_data_mql that corresponds to the scaled data
                original_close_series = raw_data_mql['Close'].iloc[len(raw_data_mql) - len(X_mql) + X_mql.shape[1] -1 - (len(X_mql) - (np.where(X_mql == last_sequence)[0][0])) : len(raw_data_mql) - (len(X_mql) - (np.where(X_mql == last_sequence)[0][0]))]
                # This is getting too complex for the example. Let's use a simpler reference.
                # The last 'Close' price in the raw_data_mql that was part of the input to the last sequence.
                # If X_mql has N samples, it means we used N+look_back-1 rows of data.
                # The last row of raw_data_mql used for features for X_mql[-1] is raw_data_mql.iloc[some_index_ending_the_last_sequence]
                
                # Let's assume the last 'Close' price from the original dataframe is relevant.
                # This is not perfectly aligned for prediction but good for a demo.
                last_actual_close_price = raw_data_mql[target_mql].iloc[-1]


                print(f"\nGenerating signal based on last sequence (shape: {last_sequence.shape})...")
                print(f"Last actual close price for reference: {last_actual_close_price}")
                
                signal, predicted_price = generate_trading_signals(
                    model_mql, 
                    last_sequence, 
                    scaler_mql, # Pass the scaler used for X_mql
                    last_actual_close_price, # The actual close price of the last day in 'last_sequence'
                    threshold=0.001 # Smaller threshold for demo
                )
                print(f"Predicted Next '{target_mql}': {predicted_price:.2f}")
                if signal == 1:
                    print("Signal: BUY")
                elif signal == -1:
                    print("Signal: SELL")
                else:
                    print("Signal: HOLD")
            else:
                print("Not enough data to generate a signal after training split.")
        else:
            print("X_mql is empty. Check preprocessing for DeepMQL.")
    else:
        print("Raw data for MQL is empty. Check data fetching.")