algorembrant's picture
Upload 61 files
9cb5a00 verified
"""
ML-3m-trader Main Entry Point
================================
Unified CLI for the full ML trading pipeline.
Usage:
python main.py fetch Fetch 1-year of 3m data from MetaTrader 5
python main.py train Build features, generate labels, train model
python main.py backtest Run walk-forward backtest on test set
python main.py evaluate Print performance metrics report
python main.py run Full pipeline: fetch -> train -> backtest -> evaluate
python main.py predict Predict on latest data using saved model
"""
import argparse
import os
import sys
import time
import numpy as np
import pandas as pd
import config as cfg
def cmd_fetch(args):
"""Fetch data from MetaTrader 5."""
from data_fetcher import fetch_mt5, save_csv
print("\n--- FETCH DATA ---")
df = fetch_mt5()
save_csv(df)
print(f"[DONE] {len(df):,} bars saved.\n")
def cmd_train(args):
"""Feature engineering + labeling + model training."""
from data_fetcher import load_csv
from features import build_features
from labeler import generate_labels
from model import train as train_model, save_model
print("\n--- TRAIN ---")
t0 = time.time()
# Load data
df = load_csv()
# Build features
df = build_features(df)
# Generate labels
labels = generate_labels(df)
df["label"] = labels
# Save featured data
os.makedirs(cfg.DATA_DIR, exist_ok=True)
feat_path = os.path.join(cfg.DATA_DIR, "featured_data.csv")
df.to_csv(feat_path, index=False)
print(f"[INFO] Featured data saved to {feat_path}")
# Train
model = train_model(df, labels)
save_model(model)
elapsed = time.time() - t0
print(f"\n[DONE] Training complete in {elapsed:.1f}s\n")
def cmd_backtest(args):
"""Run backtest on the test set using saved model."""
from data_fetcher import load_csv
from features import build_features
from labeler import generate_labels
from model import load_model, predict
from backtester import run_backtest
from metrics import (
compute_metrics, format_report, save_report, save_trades_csv,
)
print("\n--- BACKTEST ---")
t0 = time.time()
# Load and prepare data
df = load_csv()
df = build_features(df)
labels = generate_labels(df)
# Split chronologically
split_idx = int(len(df) * cfg.TRAIN_SPLIT_RATIO)
df_test = df.iloc[split_idx:].reset_index(drop=True)
# Predict
model = load_model()
preds = predict(model, df_test)
# Backtest
results = run_backtest(df_test, preds)
# Metrics
m = compute_metrics(results["trades"], results["equity_curve"])
report = format_report(m)
print(report)
save_report(report)
save_trades_csv(results["trades"])
elapsed = time.time() - t0
print(f"[DONE] Backtest complete in {elapsed:.1f}s\n")
def cmd_evaluate(args):
"""Re-print the saved report."""
report_path = os.path.join(cfg.RESULTS_DIR, "report.txt")
if not os.path.exists(report_path):
print("[ERROR] No report found. Run 'python main.py backtest' first.")
sys.exit(1)
with open(report_path, "r", encoding="utf-8") as f:
print(f.read())
def cmd_predict(args):
"""Predict on latest data using saved model."""
from data_fetcher import load_csv
from features import build_features
from model import load_model, predict_proba
print("\n--- PREDICT ---")
df = load_csv()
df = build_features(df)
model = load_model()
proba = predict_proba(model, df)
# Show last 10 predictions
last_n = min(10, len(df))
print(f"\nLast {last_n} predictions (probability per class):")
print(f"{'Bar Time':<22} {'DO_NOTHING':>10} {'BUY':>10} {'SELL':>10} {'HOLD':>10}")
print("-" * 65)
for i in range(len(df) - last_n, len(df)):
t = df["time"].iloc[i]
p = proba[i]
cls = np.argmax(p)
print(f"{str(t):<22} {p[0]:>10.4f} {p[1]:>10.4f} {p[2]:>10.4f} {p[3]:>10.4f} -> {cfg.LABEL_NAMES[cls]}")
print()
def cmd_run(args):
"""Full pipeline: fetch -> train -> backtest -> evaluate."""
cmd_fetch(args)
cmd_train(args)
cmd_backtest(args)
def main():
parser = argparse.ArgumentParser(
prog="ML-3m-trader",
description=(
"Machine learning trading system for XAUUSDc on the 3-minute timeframe.\n"
"Uses LightGBM for multi-class signal prediction with a vectorized backtester."
),
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog=(
"Examples:\n"
" python main.py fetch Fetch 1-year of 3m bars from MT5\n"
" python main.py train Feature engineering + label + train\n"
" python main.py backtest Backtest on test set with metrics\n"
" python main.py evaluate Re-print saved report\n"
" python main.py predict Show latest predictions\n"
" python main.py run Full pipeline (fetch->train->backtest)\n"
),
)
subparsers = parser.add_subparsers(dest="command", help="Available commands")
subparsers.add_parser("fetch", help="Fetch 1-year of 3m data from MetaTrader 5")
subparsers.add_parser("train", help="Build features, generate labels, train model")
subparsers.add_parser("backtest", help="Run walk-forward backtest on test set")
subparsers.add_parser("evaluate", help="Print saved performance report")
subparsers.add_parser("predict", help="Show predictions on latest data")
subparsers.add_parser("run", help="Full pipeline: fetch -> train -> backtest -> evaluate")
args = parser.parse_args()
if args.command is None:
parser.print_help()
sys.exit(0)
dispatch = {
"fetch": cmd_fetch,
"train": cmd_train,
"backtest": cmd_backtest,
"evaluate": cmd_evaluate,
"predict": cmd_predict,
"run": cmd_run,
}
dispatch[args.command](args)
if __name__ == "__main__":
main()