File size: 5,455 Bytes
9cb5a00
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
"""
ML-3m-trader Data Fetcher
==========================
Connects to MetaTrader 5 and downloads 1-year of 3-minute OHLCV data
for XAUUSDc.  Saves raw CSV for reproducibility.

Uses copy_rates_from_pos (position-based fetch) because copy_rates_range
returns 'Invalid params' on certain MT5 builds.
"""

import os
import sys
from datetime import datetime, timezone

import numpy as np
import pandas as pd

import config as cfg


def _ensure_dirs():
    os.makedirs(cfg.DATA_DIR, exist_ok=True)


def fetch_mt5(
    symbol: str = cfg.SYMBOL,
    timeframe_minutes: int = cfg.TIMEFRAME_MINUTES,
) -> pd.DataFrame:
    """
    Fetch historical bars from a running MetaTrader 5 terminal using
    copy_rates_from_pos (position-based).  Fetches all available bars,
    then filters to the configured date range.

    Returns
    -------
    pd.DataFrame
        Columns: time, open, high, low, close, volume, spread
    """
    try:
        import MetaTrader5 as mt5
    except ImportError:
        print("[ERROR] MetaTrader5 package not installed. Run: pip install MetaTrader5")
        sys.exit(1)

    if not mt5.initialize():
        print(f"[ERROR] MT5 initialize failed: {mt5.last_error()}")
        sys.exit(1)

    # Verify symbol
    si = mt5.symbol_info(symbol)
    if si is None:
        print(f"[ERROR] Symbol '{symbol}' not found: {mt5.last_error()}")
        mt5.shutdown()
        sys.exit(1)

    mt5.symbol_select(symbol, True)
    print(f"[INFO] Symbol '{symbol}' visible={si.visible}, bid={si.bid}, spread={si.spread}")

    # Resolve timeframe
    tf_map = {
        1: mt5.TIMEFRAME_M1,
        3: mt5.TIMEFRAME_M3,
        5: mt5.TIMEFRAME_M5,
        15: mt5.TIMEFRAME_M15,
        30: mt5.TIMEFRAME_M30,
        60: mt5.TIMEFRAME_H1,
    }
    tf = tf_map.get(timeframe_minutes)
    if tf is None:
        print(f"[ERROR] Unsupported timeframe: {timeframe_minutes}m")
        mt5.shutdown()
        sys.exit(1)

    # Fetch using copy_rates_from_pos — grab a large chunk from most recent
    # M3: ~130 bars/day * 365 days ≈ 47,450 bars.  Request 500,000 to be safe.
    max_bars = 500_000
    print(f"[INFO] Fetching up to {max_bars:,} M{timeframe_minutes} bars via copy_rates_from_pos...")

    rates = mt5.copy_rates_from_pos(symbol, tf, 0, max_bars)

    # Fallback: if M3 direct fails, fetch M1 and resample
    if rates is None or len(rates) == 0:
        print(f"[WARN] No M{timeframe_minutes} data. Fetching M1 and resampling...")
        rates_m1 = mt5.copy_rates_from_pos(symbol, mt5.TIMEFRAME_M1, 0, max_bars)
        mt5.shutdown()

        if rates_m1 is None or len(rates_m1) == 0:
            print(f"[ERROR] No M1 data either: {mt5.last_error()}")
            sys.exit(1)

        df = pd.DataFrame(rates_m1)
        df["time"] = pd.to_datetime(df["time"], unit="s", utc=True)
        df.rename(columns={"tick_volume": "volume"}, inplace=True)
        df.set_index("time", inplace=True)

        agg = {"open": "first", "high": "max", "low": "min",
               "close": "last", "volume": "sum", "spread": "last"}
        # Only include columns that exist
        agg = {k: v for k, v in agg.items() if k in df.columns}
        df = df.resample(f"{timeframe_minutes}min").agg(agg).dropna(subset=["open"])
        df.reset_index(inplace=True)
        print(f"[INFO] Resampled M1 -> M{timeframe_minutes}: {len(df):,} bars")
    else:
        mt5.shutdown()
        df = pd.DataFrame(rates)
        df["time"] = pd.to_datetime(df["time"], unit="s", utc=True)
        df.rename(columns={"tick_volume": "volume"}, inplace=True)
        print(f"[INFO] Fetched {len(df):,} M{timeframe_minutes} bars directly")

    # Keep only needed columns
    cols = ["time", "open", "high", "low", "close", "volume", "spread"]
    df = df[[c for c in cols if c in df.columns]].copy()
    if "spread" not in df.columns:
        df["spread"] = 0

    # Filter to date range: Jan 1 2025 — Jan 1 2026
    date_from = pd.Timestamp("2025-01-01", tz="UTC")
    date_to = pd.Timestamp("2026-01-01", tz="UTC")
    before = len(df)
    df = df[(df["time"] >= date_from) & (df["time"] < date_to)].copy()
    df.reset_index(drop=True, inplace=True)
    print(f"[INFO] Filtered to {date_from.date()}{date_to.date()}: "
          f"{len(df):,} bars (dropped {before - len(df):,} outside range)")

    if len(df) == 0:
        print("[ERROR] No bars in the target date range.")
        sys.exit(1)

    print(f"[INFO] Final dataset: {len(df):,} bars "
          f"({df['time'].iloc[0]} to {df['time'].iloc[-1]})")

    return df


def save_csv(df: pd.DataFrame, filename: str = None) -> str:
    """Save DataFrame to CSV in the data directory."""
    _ensure_dirs()
    if filename is None:
        filename = f"raw_{cfg.SYMBOL.lower()}_m{cfg.TIMEFRAME_MINUTES}.csv"
    path = os.path.join(cfg.DATA_DIR, filename)
    df.to_csv(path, index=False)
    print(f"[INFO] Saved {len(df):,} rows to {path}")
    return path


def load_csv(filename: str = None) -> pd.DataFrame:
    """Load previously saved CSV."""
    if filename is None:
        filename = f"raw_{cfg.SYMBOL.lower()}_m{cfg.TIMEFRAME_MINUTES}.csv"
    path = os.path.join(cfg.DATA_DIR, filename)
    if not os.path.exists(path):
        print(f"[ERROR] Data file not found: {path}")
        print("        Run 'python main.py fetch' first.")
        sys.exit(1)
    df = pd.read_csv(path, parse_dates=["time"])
    print(f"[INFO] Loaded {len(df):,} rows from {path}")
    return df