algorembrant's picture
Upload 61 files
9cb5a00 verified
"""
ML-3m-trader Data Fetcher
==========================
Connects to MetaTrader 5 and downloads 1-year of 3-minute OHLCV data
for XAUUSDc. Saves raw CSV for reproducibility.
Uses copy_rates_from_pos (position-based fetch) because copy_rates_range
returns 'Invalid params' on certain MT5 builds.
"""
import os
import sys
from datetime import datetime, timezone
import numpy as np
import pandas as pd
import config as cfg
def _ensure_dirs():
os.makedirs(cfg.DATA_DIR, exist_ok=True)
def fetch_mt5(
symbol: str = cfg.SYMBOL,
timeframe_minutes: int = cfg.TIMEFRAME_MINUTES,
) -> pd.DataFrame:
"""
Fetch historical bars from a running MetaTrader 5 terminal using
copy_rates_from_pos (position-based). Fetches all available bars,
then filters to the configured date range.
Returns
-------
pd.DataFrame
Columns: time, open, high, low, close, volume, spread
"""
try:
import MetaTrader5 as mt5
except ImportError:
print("[ERROR] MetaTrader5 package not installed. Run: pip install MetaTrader5")
sys.exit(1)
if not mt5.initialize():
print(f"[ERROR] MT5 initialize failed: {mt5.last_error()}")
sys.exit(1)
# Verify symbol
si = mt5.symbol_info(symbol)
if si is None:
print(f"[ERROR] Symbol '{symbol}' not found: {mt5.last_error()}")
mt5.shutdown()
sys.exit(1)
mt5.symbol_select(symbol, True)
print(f"[INFO] Symbol '{symbol}' visible={si.visible}, bid={si.bid}, spread={si.spread}")
# Resolve timeframe
tf_map = {
1: mt5.TIMEFRAME_M1,
3: mt5.TIMEFRAME_M3,
5: mt5.TIMEFRAME_M5,
15: mt5.TIMEFRAME_M15,
30: mt5.TIMEFRAME_M30,
60: mt5.TIMEFRAME_H1,
}
tf = tf_map.get(timeframe_minutes)
if tf is None:
print(f"[ERROR] Unsupported timeframe: {timeframe_minutes}m")
mt5.shutdown()
sys.exit(1)
# Fetch using copy_rates_from_pos — grab a large chunk from most recent
# M3: ~130 bars/day * 365 days ≈ 47,450 bars. Request 500,000 to be safe.
max_bars = 500_000
print(f"[INFO] Fetching up to {max_bars:,} M{timeframe_minutes} bars via copy_rates_from_pos...")
rates = mt5.copy_rates_from_pos(symbol, tf, 0, max_bars)
# Fallback: if M3 direct fails, fetch M1 and resample
if rates is None or len(rates) == 0:
print(f"[WARN] No M{timeframe_minutes} data. Fetching M1 and resampling...")
rates_m1 = mt5.copy_rates_from_pos(symbol, mt5.TIMEFRAME_M1, 0, max_bars)
mt5.shutdown()
if rates_m1 is None or len(rates_m1) == 0:
print(f"[ERROR] No M1 data either: {mt5.last_error()}")
sys.exit(1)
df = pd.DataFrame(rates_m1)
df["time"] = pd.to_datetime(df["time"], unit="s", utc=True)
df.rename(columns={"tick_volume": "volume"}, inplace=True)
df.set_index("time", inplace=True)
agg = {"open": "first", "high": "max", "low": "min",
"close": "last", "volume": "sum", "spread": "last"}
# Only include columns that exist
agg = {k: v for k, v in agg.items() if k in df.columns}
df = df.resample(f"{timeframe_minutes}min").agg(agg).dropna(subset=["open"])
df.reset_index(inplace=True)
print(f"[INFO] Resampled M1 -> M{timeframe_minutes}: {len(df):,} bars")
else:
mt5.shutdown()
df = pd.DataFrame(rates)
df["time"] = pd.to_datetime(df["time"], unit="s", utc=True)
df.rename(columns={"tick_volume": "volume"}, inplace=True)
print(f"[INFO] Fetched {len(df):,} M{timeframe_minutes} bars directly")
# Keep only needed columns
cols = ["time", "open", "high", "low", "close", "volume", "spread"]
df = df[[c for c in cols if c in df.columns]].copy()
if "spread" not in df.columns:
df["spread"] = 0
# Filter to date range: Jan 1 2025 — Jan 1 2026
date_from = pd.Timestamp("2025-01-01", tz="UTC")
date_to = pd.Timestamp("2026-01-01", tz="UTC")
before = len(df)
df = df[(df["time"] >= date_from) & (df["time"] < date_to)].copy()
df.reset_index(drop=True, inplace=True)
print(f"[INFO] Filtered to {date_from.date()}{date_to.date()}: "
f"{len(df):,} bars (dropped {before - len(df):,} outside range)")
if len(df) == 0:
print("[ERROR] No bars in the target date range.")
sys.exit(1)
print(f"[INFO] Final dataset: {len(df):,} bars "
f"({df['time'].iloc[0]} to {df['time'].iloc[-1]})")
return df
def save_csv(df: pd.DataFrame, filename: str = None) -> str:
"""Save DataFrame to CSV in the data directory."""
_ensure_dirs()
if filename is None:
filename = f"raw_{cfg.SYMBOL.lower()}_m{cfg.TIMEFRAME_MINUTES}.csv"
path = os.path.join(cfg.DATA_DIR, filename)
df.to_csv(path, index=False)
print(f"[INFO] Saved {len(df):,} rows to {path}")
return path
def load_csv(filename: str = None) -> pd.DataFrame:
"""Load previously saved CSV."""
if filename is None:
filename = f"raw_{cfg.SYMBOL.lower()}_m{cfg.TIMEFRAME_MINUTES}.csv"
path = os.path.join(cfg.DATA_DIR, filename)
if not os.path.exists(path):
print(f"[ERROR] Data file not found: {path}")
print(" Run 'python main.py fetch' first.")
sys.exit(1)
df = pd.read_csv(path, parse_dates=["time"])
print(f"[INFO] Loaded {len(df):,} rows from {path}")
return df