File size: 8,071 Bytes
d1a316c
 
 
af365c7
d1a316c
 
 
 
 
 
 
 
af365c7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d1a316c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
af365c7
d1a316c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
af365c7
d1a316c
 
af365c7
d1a316c
 
 
af365c7
 
 
d1a316c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
af365c7
 
 
 
d1a316c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
af365c7
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
import os
import pandas as pd
import numpy as np
import argparse
from datetime import timedelta
from binance.client import Client
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report
import ta
import pytz

# Parse command-line arguments for timeframe
parser = argparse.ArgumentParser(description="Binance Trend Forecaster with adjustable timeframe")
parser.add_argument("--interval", type=str, default="4h",
                    choices=["1m","3m","5m","15m","30m","1h","4h","1d"],
                    help="Time interval for klines (e.g. '1h', '4h', '1d')")
args = parser.parse_args()

# Map user-friendly intervals to Binance API constants
interval_map = {
    "1m": Client.KLINE_INTERVAL_1MINUTE,
    "3m": Client.KLINE_INTERVAL_3MINUTE,
    "5m": Client.KLINE_INTERVAL_5MINUTE,
    "15m": Client.KLINE_INTERVAL_15MINUTE,
    "30m": Client.KLINE_INTERVAL_30MINUTE,
    "1h": Client.KLINE_INTERVAL_1HOUR,
    "4h": Client.KLINE_INTERVAL_4HOUR,
    "1d": Client.KLINE_INTERVAL_1DAY
}
interval = interval_map[args.interval]

# Function to log results to both console and file
# Blank lines are added after each asset block explicitly
def log_results(message, filename="predictions_results.txt"):
    print(message)
    with open(filename, "a") as f:
        f.write(message + "\n")

# Convert UTC timestamp to Europe/Paris timezone
def convert_to_paris_time(utc_time):
    paris_tz = pytz.timezone('Europe/Paris')
    utc_time = utc_time.replace(tzinfo=pytz.utc)
    paris_time = utc_time.astimezone(paris_tz)
    return paris_time.strftime('%Y-%m-%d %H:%M:%S')

# Initialize Binance client
client = Client()

# Settings
result_file = f"predictions_results_{args.interval}.txt"

# Delete the results file if it exists for a fresh start
if os.path.exists(result_file):
    os.remove(result_file)

# Initialize result file header
with open(result_file, "w") as f:
    f.write("Asset,Time,Price,Prediction,Optimal_UP_TP,Optimal_UP_SL,Optimal_DN_TP,Optimal_DN_SL\n")

# Get USDT-quoted trading symbols
symbols = [s['symbol'] for s in client.get_exchange_info()['symbols']
           if s['status']=='TRADING' and s['quoteAsset']=='USDT']

# Optimize take-profit / stop-loss function
def optimize_tp_sl(df, signals, side, pgrid, lgrid):
    best = (0, 0, -np.inf)
    prices = df['close'].values
    idxs = np.where(signals == side)[0]
    for tp in pgrid:
        for sl in lgrid:
            rets = []
            for i in idxs:
                entry = prices[i]
                for j in range(i+1, min(i+11, len(prices))):
                    ret = (prices[j] - entry) / entry if side == 1 else (entry - prices[j]) / entry
                    if ret >= tp or ret <= -sl:
                        rets.append(np.sign(ret) * min(abs(ret), max(tp, sl)))
                        break
            if rets:
                avg_ret = np.mean(rets)
                if avg_ret > best[2]:
                    best = (tp, sl, avg_ret)
    return best

# Main loop: process each symbol
for symbol in symbols:
    log_results(f"=== {symbol} ({args.interval}) ===", result_file)

    # Load or download historical data
    data_file = f"{symbol}_data_{args.interval}_full.csv"
    if os.path.exists(data_file):
        df = pd.read_csv(data_file, index_col=0, parse_dates=True)
        last_ts = df.index[-1]
        start = (last_ts + timedelta(**{
            'minutes':1 if args.interval=='1m' else 3 if args.interval=='3m' else 5 if args.interval=='5m' else 15 if args.interval=='15m' else 30 if args.interval=='30m' else 60 if args.interval=='1h' else 240 if args.interval=='4h' else 1440
        })).strftime("%d %B %Y %H:%M:%S")
        new = client.get_historical_klines(symbol, interval, start)
        if new:
            new_df = pd.DataFrame(new, columns=[
                'timestamp','open','high','low','close','volume',
                'close_time','quote_av','trades','tb_base_av','tb_quote_av','ignore'
            ])
            new_df = new_df[['timestamp','open','high','low','close','volume']].astype(float)
            new_df['timestamp'] = pd.to_datetime(new_df['timestamp'], unit='ms')
            new_df.set_index('timestamp', inplace=True)
            df = pd.concat([df, new_df]).drop_duplicates()
            df.to_csv(data_file)
    else:
        klines = client.get_historical_klines(symbol, interval, "01 December 2021")
        df = pd.DataFrame(klines, columns=[
            'timestamp','open','high','low','close','volume',
            'close_time','quote_av','trades','tb_base_av','tb_quote_av','ignore'
        ])
        df = df[['timestamp','open','high','low','close','volume']].astype(float)
        df['timestamp'] = pd.to_datetime(df['timestamp'], unit='ms')
        df.set_index('timestamp', inplace=True)
        df.to_csv(data_file)

    # Compute technical indicators
    df['rsi'] = ta.momentum.RSIIndicator(df['close'], window=14).rsi()
    df['macd'] = ta.trend.MACD(df['close']).macd()
    for s in [10, 20, 50, 100]:
        df[f'ema_{s}'] = df['close'].ewm(span=s).mean()
    for w in [10, 20, 50, 100]:
        df[f'sma_{w}'] = df['close'].rolling(window=w).mean()
    bb = ta.volatility.BollingerBands(df['close'], window=20, window_dev=2)
    df['bbw'] = (bb.bollinger_hband() - bb.bollinger_lband()) / bb.bollinger_mavg()
    df['atr'] = ta.volatility.AverageTrueRange(df['high'], df['low'], df['close'], window=14).average_true_range()
    df['adx'] = ta.trend.ADXIndicator(df['high'], df['low'], df['close'], window=14).adx()
    st = ta.momentum.StochasticOscillator(df['high'], df['low'], df['close'], window=14)
    df['st_k'] = st.stoch()
    df['st_d'] = st.stoch_signal()
    df['wr'] = ta.momentum.WilliamsRIndicator(df['high'], df['low'], df['close'], lbp=14).williams_r()
    df['cci'] = ta.trend.CCIIndicator(df['high'], df['low'], df['close'], window=20).cci()
    df['mom'] = df['close'] - df['close'].shift(10)
    ichi = ta.trend.IchimokuIndicator(df['high'], df['low'], window1=9, window2=26, window3=52)
    df['span_a'] = ichi.ichimoku_a()
    df['span_b'] = ichi.ichimoku_b()
    df.dropna(inplace=True)

    # Label signals based on Ichimoku cloud
    df['signal'] = np.select([
        (df['close'] > df['span_a']) & (df['close'] > df['span_b']),
        (df['close'] < df['span_a']) & (df['close'] < df['span_b'])
    ], [1, 0], default=-1)

    # Train/test split
    features = [c for c in df.columns if c not in ['open','high','low','close','volume','signal']]
    X, y = df[features], df['signal']
    Xtr, Xte, ytr, yte = train_test_split(X, y, test_size=0.2, shuffle=False)
    model = RandomForestClassifier(n_estimators=200, class_weight='balanced', random_state=42)
    model.fit(Xtr, ytr)
    ypr = model.predict(Xte)

    # Log classification report
    report = classification_report(yte, ypr, zero_division=0)
    log_results(f"Classification report for {symbol}:\n{report}", result_file)

    # Predict latest trend with correct feature naming
    latest_df = X.iloc[-1:]
    trend_label = model.predict(latest_df)[0]

    # Convert timestamp to Paris time and fetch price
    pred_time_utc = df.index[-1]
    pred_time = convert_to_paris_time(pred_time_utc)
    pred_price = df['close'].iloc[-1]
    trend_str = {1:'Uptrend',0:'Downtrend',-1:'Neutral'}[trend_label]
    log_results(f"Time: {pred_time}, Price: {pred_price:.2f}, Prediction: {trend_str}", result_file)

    # Optimize TP/SL and log results
    hist_sign = model.predict(X)
    pgrid = np.arange(0.01, 0.1, 0.01)
    lgrid = np.arange(0.01, 0.1, 0.01)
    up_tp, up_sl, _ = optimize_tp_sl(df, hist_sign, 1, pgrid, lgrid)
    dn_tp, dn_sl, _ = optimize_tp_sl(df, hist_sign, 0, pgrid, lgrid)
    log_results(f"Optimal UP TP/SL: +{up_tp*100:.1f}% / -{up_sl*100:.1f}%", result_file)
    log_results(f"Optimal DN TP/SL: +{dn_tp*100:.1f}% / -{dn_sl*100:.1f}%", result_file)

    # Blank line after asset
    with open(result_file, "a") as f:
        f.write("\n")

# End of processing
log_results("All assets processed.", result_file)