File size: 6,063 Bytes
9cb5a00
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
"""
ML-3m-trader Main Entry Point
================================
Unified CLI for the full ML trading pipeline.

Usage:
    python main.py fetch       Fetch 1-year of 3m data from MetaTrader 5
    python main.py train       Build features, generate labels, train model
    python main.py backtest    Run walk-forward backtest on test set
    python main.py evaluate    Print performance metrics report
    python main.py run         Full pipeline: fetch -> train -> backtest -> evaluate
    python main.py predict     Predict on latest data using saved model
"""

import argparse
import os
import sys
import time

import numpy as np
import pandas as pd

import config as cfg


def cmd_fetch(args):
    """Fetch data from MetaTrader 5."""
    from data_fetcher import fetch_mt5, save_csv

    print("\n--- FETCH DATA ---")
    df = fetch_mt5()
    save_csv(df)
    print(f"[DONE] {len(df):,} bars saved.\n")


def cmd_train(args):
    """Feature engineering + labeling + model training."""
    from data_fetcher import load_csv
    from features import build_features
    from labeler import generate_labels
    from model import train as train_model, save_model

    print("\n--- TRAIN ---")
    t0 = time.time()

    # Load data
    df = load_csv()

    # Build features
    df = build_features(df)

    # Generate labels
    labels = generate_labels(df)
    df["label"] = labels

    # Save featured data
    os.makedirs(cfg.DATA_DIR, exist_ok=True)
    feat_path = os.path.join(cfg.DATA_DIR, "featured_data.csv")
    df.to_csv(feat_path, index=False)
    print(f"[INFO] Featured data saved to {feat_path}")

    # Train
    model = train_model(df, labels)
    save_model(model)

    elapsed = time.time() - t0
    print(f"\n[DONE] Training complete in {elapsed:.1f}s\n")


def cmd_backtest(args):
    """Run backtest on the test set using saved model."""
    from data_fetcher import load_csv
    from features import build_features
    from labeler import generate_labels
    from model import load_model, predict
    from backtester import run_backtest
    from metrics import (
        compute_metrics, format_report, save_report, save_trades_csv,
    )

    print("\n--- BACKTEST ---")
    t0 = time.time()

    # Load and prepare data
    df = load_csv()
    df = build_features(df)
    labels = generate_labels(df)

    # Split chronologically
    split_idx = int(len(df) * cfg.TRAIN_SPLIT_RATIO)
    df_test = df.iloc[split_idx:].reset_index(drop=True)

    # Predict
    model = load_model()
    preds = predict(model, df_test)

    # Backtest
    results = run_backtest(df_test, preds)

    # Metrics
    m = compute_metrics(results["trades"], results["equity_curve"])
    report = format_report(m)
    print(report)
    save_report(report)
    save_trades_csv(results["trades"])

    elapsed = time.time() - t0
    print(f"[DONE] Backtest complete in {elapsed:.1f}s\n")


def cmd_evaluate(args):
    """Re-print the saved report."""
    report_path = os.path.join(cfg.RESULTS_DIR, "report.txt")
    if not os.path.exists(report_path):
        print("[ERROR] No report found. Run 'python main.py backtest' first.")
        sys.exit(1)
    with open(report_path, "r", encoding="utf-8") as f:
        print(f.read())


def cmd_predict(args):
    """Predict on latest data using saved model."""
    from data_fetcher import load_csv
    from features import build_features
    from model import load_model, predict_proba

    print("\n--- PREDICT ---")
    df = load_csv()
    df = build_features(df)

    model = load_model()
    proba = predict_proba(model, df)

    # Show last 10 predictions
    last_n = min(10, len(df))
    print(f"\nLast {last_n} predictions (probability per class):")
    print(f"{'Bar Time':<22} {'DO_NOTHING':>10} {'BUY':>10} {'SELL':>10} {'HOLD':>10}")
    print("-" * 65)
    for i in range(len(df) - last_n, len(df)):
        t = df["time"].iloc[i]
        p = proba[i]
        cls = np.argmax(p)
        print(f"{str(t):<22} {p[0]:>10.4f} {p[1]:>10.4f} {p[2]:>10.4f} {p[3]:>10.4f}  -> {cfg.LABEL_NAMES[cls]}")
    print()


def cmd_run(args):
    """Full pipeline: fetch -> train -> backtest -> evaluate."""
    cmd_fetch(args)
    cmd_train(args)
    cmd_backtest(args)


def main():
    parser = argparse.ArgumentParser(
        prog="ML-3m-trader",
        description=(
            "Machine learning trading system for XAUUSDc on the 3-minute timeframe.\n"
            "Uses LightGBM for multi-class signal prediction with a vectorized backtester."
        ),
        formatter_class=argparse.RawDescriptionHelpFormatter,
        epilog=(
            "Examples:\n"
            "  python main.py fetch       Fetch 1-year of 3m bars from MT5\n"
            "  python main.py train       Feature engineering + label + train\n"
            "  python main.py backtest    Backtest on test set with metrics\n"
            "  python main.py evaluate    Re-print saved report\n"
            "  python main.py predict     Show latest predictions\n"
            "  python main.py run         Full pipeline (fetch->train->backtest)\n"
        ),
    )

    subparsers = parser.add_subparsers(dest="command", help="Available commands")

    subparsers.add_parser("fetch", help="Fetch 1-year of 3m data from MetaTrader 5")
    subparsers.add_parser("train", help="Build features, generate labels, train model")
    subparsers.add_parser("backtest", help="Run walk-forward backtest on test set")
    subparsers.add_parser("evaluate", help="Print saved performance report")
    subparsers.add_parser("predict", help="Show predictions on latest data")
    subparsers.add_parser("run", help="Full pipeline: fetch -> train -> backtest -> evaluate")

    args = parser.parse_args()

    if args.command is None:
        parser.print_help()
        sys.exit(0)

    dispatch = {
        "fetch": cmd_fetch,
        "train": cmd_train,
        "backtest": cmd_backtest,
        "evaluate": cmd_evaluate,
        "predict": cmd_predict,
        "run": cmd_run,
    }

    dispatch[args.command](args)


if __name__ == "__main__":
    main()