File size: 6,063 Bytes
9cb5a00 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 | """
ML-3m-trader Main Entry Point
================================
Unified CLI for the full ML trading pipeline.
Usage:
python main.py fetch Fetch 1-year of 3m data from MetaTrader 5
python main.py train Build features, generate labels, train model
python main.py backtest Run walk-forward backtest on test set
python main.py evaluate Print performance metrics report
python main.py run Full pipeline: fetch -> train -> backtest -> evaluate
python main.py predict Predict on latest data using saved model
"""
import argparse
import os
import sys
import time
import numpy as np
import pandas as pd
import config as cfg
def cmd_fetch(args):
"""Fetch data from MetaTrader 5."""
from data_fetcher import fetch_mt5, save_csv
print("\n--- FETCH DATA ---")
df = fetch_mt5()
save_csv(df)
print(f"[DONE] {len(df):,} bars saved.\n")
def cmd_train(args):
"""Feature engineering + labeling + model training."""
from data_fetcher import load_csv
from features import build_features
from labeler import generate_labels
from model import train as train_model, save_model
print("\n--- TRAIN ---")
t0 = time.time()
# Load data
df = load_csv()
# Build features
df = build_features(df)
# Generate labels
labels = generate_labels(df)
df["label"] = labels
# Save featured data
os.makedirs(cfg.DATA_DIR, exist_ok=True)
feat_path = os.path.join(cfg.DATA_DIR, "featured_data.csv")
df.to_csv(feat_path, index=False)
print(f"[INFO] Featured data saved to {feat_path}")
# Train
model = train_model(df, labels)
save_model(model)
elapsed = time.time() - t0
print(f"\n[DONE] Training complete in {elapsed:.1f}s\n")
def cmd_backtest(args):
"""Run backtest on the test set using saved model."""
from data_fetcher import load_csv
from features import build_features
from labeler import generate_labels
from model import load_model, predict
from backtester import run_backtest
from metrics import (
compute_metrics, format_report, save_report, save_trades_csv,
)
print("\n--- BACKTEST ---")
t0 = time.time()
# Load and prepare data
df = load_csv()
df = build_features(df)
labels = generate_labels(df)
# Split chronologically
split_idx = int(len(df) * cfg.TRAIN_SPLIT_RATIO)
df_test = df.iloc[split_idx:].reset_index(drop=True)
# Predict
model = load_model()
preds = predict(model, df_test)
# Backtest
results = run_backtest(df_test, preds)
# Metrics
m = compute_metrics(results["trades"], results["equity_curve"])
report = format_report(m)
print(report)
save_report(report)
save_trades_csv(results["trades"])
elapsed = time.time() - t0
print(f"[DONE] Backtest complete in {elapsed:.1f}s\n")
def cmd_evaluate(args):
"""Re-print the saved report."""
report_path = os.path.join(cfg.RESULTS_DIR, "report.txt")
if not os.path.exists(report_path):
print("[ERROR] No report found. Run 'python main.py backtest' first.")
sys.exit(1)
with open(report_path, "r", encoding="utf-8") as f:
print(f.read())
def cmd_predict(args):
"""Predict on latest data using saved model."""
from data_fetcher import load_csv
from features import build_features
from model import load_model, predict_proba
print("\n--- PREDICT ---")
df = load_csv()
df = build_features(df)
model = load_model()
proba = predict_proba(model, df)
# Show last 10 predictions
last_n = min(10, len(df))
print(f"\nLast {last_n} predictions (probability per class):")
print(f"{'Bar Time':<22} {'DO_NOTHING':>10} {'BUY':>10} {'SELL':>10} {'HOLD':>10}")
print("-" * 65)
for i in range(len(df) - last_n, len(df)):
t = df["time"].iloc[i]
p = proba[i]
cls = np.argmax(p)
print(f"{str(t):<22} {p[0]:>10.4f} {p[1]:>10.4f} {p[2]:>10.4f} {p[3]:>10.4f} -> {cfg.LABEL_NAMES[cls]}")
print()
def cmd_run(args):
"""Full pipeline: fetch -> train -> backtest -> evaluate."""
cmd_fetch(args)
cmd_train(args)
cmd_backtest(args)
def main():
parser = argparse.ArgumentParser(
prog="ML-3m-trader",
description=(
"Machine learning trading system for XAUUSDc on the 3-minute timeframe.\n"
"Uses LightGBM for multi-class signal prediction with a vectorized backtester."
),
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog=(
"Examples:\n"
" python main.py fetch Fetch 1-year of 3m bars from MT5\n"
" python main.py train Feature engineering + label + train\n"
" python main.py backtest Backtest on test set with metrics\n"
" python main.py evaluate Re-print saved report\n"
" python main.py predict Show latest predictions\n"
" python main.py run Full pipeline (fetch->train->backtest)\n"
),
)
subparsers = parser.add_subparsers(dest="command", help="Available commands")
subparsers.add_parser("fetch", help="Fetch 1-year of 3m data from MetaTrader 5")
subparsers.add_parser("train", help="Build features, generate labels, train model")
subparsers.add_parser("backtest", help="Run walk-forward backtest on test set")
subparsers.add_parser("evaluate", help="Print saved performance report")
subparsers.add_parser("predict", help="Show predictions on latest data")
subparsers.add_parser("run", help="Full pipeline: fetch -> train -> backtest -> evaluate")
args = parser.parse_args()
if args.command is None:
parser.print_help()
sys.exit(0)
dispatch = {
"fetch": cmd_fetch,
"train": cmd_train,
"backtest": cmd_backtest,
"evaluate": cmd_evaluate,
"predict": cmd_predict,
"run": cmd_run,
}
dispatch[args.command](args)
if __name__ == "__main__":
main()
|