Spaces:

Alvin3y1
/

test

Sleeping

App Files Files Community

Alvin3y1 commited on 15 days ago

Commit

2503fda

verified ·

1 Parent(s): df9c251

Update app.py

Browse files

Files changed (1) hide show

app.py +93 -47

app.py CHANGED Viewed

@@ -8,10 +8,13 @@ import numpy as np
 from aiohttp import web
 from sklearn.ensemble import RandomForestRegressor
 SYMBOL_KRAKEN = "BTC/USD"
 PORT = 7860
 BROADCAST_RATE = 1.0
-PREDICTION_HORIZON = 100
 logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(message)s')
@@ -35,96 +38,132 @@ def calculate_indicators(candles):
     for c in cols:
         df[c] = df[c].astype(float)
-    df['ema'] = df['close'].ewm(span=20, adjust=False).mean()
-    df['ema_fast'] = df['close'].ewm(span=9, adjust=False).mean()
-    df['ema_slow'] = df['close'].ewm(span=50, adjust=False).mean()
-    df['sma20'] = df['close'].rolling(window=20).mean()
     df['std'] = df['close'].rolling(window=20).std()
-    df['bb_upper'] = df['sma20'] + (df['std'] * 2)
-    df['bb_lower'] = df['sma20'] - (df['std'] * 2)
-    df['bb_mid'] = df['sma20']
     delta = df['close'].diff()
     gain = (delta.where(delta > 0, 0)).rolling(window=14).mean()
     loss = (-delta.where(delta < 0, 0)).rolling(window=14).mean()
     rs = gain / loss
     df['rsi'] = 100 - (100 / (1 + rs))
     k = df['close'].ewm(span=12, adjust=False).mean()
     d = df['close'].ewm(span=26, adjust=False).mean()
     df['macd'] = k - d
     df['macd_signal'] = df['macd'].ewm(span=9, adjust=False).mean()
     df['macd_hist'] = df['macd'] - df['macd_signal']
-    low_min = df['low'].rolling(window=14).min()
-    high_max = df['high'].rolling(window=14).max()
-    df['stoch_k'] = 100 * ((df['close'] - low_min) / (high_max - low_min))
-    df['stoch_d'] = df['stoch_k'].rolling(window=3).mean()
     df['tr0'] = abs(df['high'] - df['low'])
     df['tr1'] = abs(df['high'] - df['close'].shift())
     df['tr2'] = abs(df['low'] - df['close'].shift())
     df['tr'] = df[['tr0', 'tr1', 'tr2']].max(axis=1)
     df['atr'] = df['tr'].rolling(window=14).mean()
-    df['obv'] = (np.sign(df['close'].diff()) * df['volume']).fillna(0).cumsum()
-    df['tp'] = (df['high'] + df['low'] + df['close']) / 3
-    df['vwap'] = (df['tp'] * df['volume']).cumsum() / df['volume'].cumsum()
     return df
 def train_model(df):
-    logging.info("Training ML Model...")
-    feature_cols = ['close', 'ema', 'bb_upper', 'bb_lower', 'rsi', 'macd', 'stoch_k', 'atr', 'obv', 'vwap']
     data = df.dropna().copy()
-    future_shifts = {}
     targets = []
     for i in range(1, PREDICTION_HORIZON + 1):
-        col_name = f'target_{i}'
-        future_shifts[col_name] = data['close'].shift(-i)
         targets.append(col_name)
-    target_df = pd.DataFrame(future_shifts, index=data.index)
-    data = pd.concat([data, target_df], axis=1)
     data = data.dropna()
-    if len(data) < 100:
         logging.warning("Not enough data to train model yet.")
         return None
     X = data[feature_cols].values
     y = data[targets].values
-    model = RandomForestRegressor(n_estimators=50, max_depth=10, n_jobs=-1, random_state=42)
     model.fit(X, y)
-    logging.info(f"Model Trained on {len(X)} samples.")
     return model
 def get_prediction(df, model):
     if model is None:
         return []
-    feature_cols = ['close', 'ema', 'bb_upper', 'bb_lower', 'rsi', 'macd', 'stoch_k', 'atr', 'obv', 'vwap']
     last_row = df.iloc[[-1]][feature_cols]
     if last_row.isnull().values.any():
         return []
-    prediction = model.predict(last_row.values)[0]
     current_time = int(df.iloc[-1]['time'])
     pred_data = []
-    for i, price in enumerate(prediction):
         pred_data.append({
-            "time": current_time + ((i + 1) * 60),
-            "value": float(price)
         })
     return pred_data
@@ -133,23 +172,28 @@ def process_market_data():
     if not market_state['ready'] or not market_state['ohlc_history']:
         return {"error": "Initializing..."}
     df = calculate_indicators(market_state['ohlc_history'])
     if df is None or len(df) < 50:
         return {"error": "Not enough data"}
-    if market_state['model'] is None or (time.time() - market_state['last_training_time'] > 900):
         try:
             market_state['model'] = train_model(df)
             market_state['last_training_time'] = time.time()
         except Exception as e:
             logging.error(f"Training failed: {e}")
     predictions = []
     try:
         predictions = get_prediction(df, market_state['model'])
     except Exception as e:
         logging.error(f"Prediction failed: {e}")
     df_clean = df.replace([np.inf, -np.inf], np.nan)
     df_clean = df_clean.astype(object).where(pd.notnull(df_clean), None)
@@ -160,12 +204,12 @@ def process_market_data():
     market_state['last_price'] = last_close
     market_state['price_change'] = price_change
-    full_data = df_clean.to_dict('records')
     last_row = df.iloc[-1] if len(df) > 0 else {}
     return {
-        "data": full_data,
         "prediction": predictions,
         "stats": {
             "price": last_close,
@@ -177,6 +221,7 @@ def process_market_data():
         }
     }
 HTML_PAGE = """
 <!DOCTYPE html>
 <html lang="en">
@@ -458,10 +503,6 @@ HTML_PAGE = """
             <span class="indicator-label">MACD</span>
             <span id="macd-val" class="indicator-value">--</span>
         </div>
-        <div class="indicator-group">
-            <span class="indicator-label">Stoch K</span>
-            <span id="stoch-val" class="indicator-value" style="color: #ff9800">--</span>
-        </div>
         <div class="indicator-group">
             <span class="indicator-label">Volume</span>
             <span id="vol-val" class="indicator-value" style="color: #888">--</span>
@@ -569,7 +610,8 @@ document.addEventListener('DOMContentLoaded', () => {
         color: '#bf5af2',
         lineWidth: 2,
         lineStyle: LightweightCharts.LineStyle.Dashed,
-        crosshairMarkerVisible: false
     });
     const volumeSeries = volChart.addHistogramSeries({
@@ -640,7 +682,7 @@ document.addEventListener('DOMContentLoaded', () => {
         }
         if (lastData) {
-            document.getElementById('ema-val').textContent = lastData.ema ? lastData.ema.toFixed(2) : '--';
             document.getElementById('bb-upper').textContent = lastData.bb_upper ? lastData.bb_upper.toFixed(2) : '--';
             document.getElementById('bb-lower').textContent = lastData.bb_lower ? lastData.bb_lower.toFixed(2) : '--';
@@ -651,7 +693,6 @@ document.addEventListener('DOMContentLoaded', () => {
                 macdEl.style.color = macdVal >= 0 ? '#26a69a' : '#ef5350';
             }
-            document.getElementById('stoch-val').textContent = lastData.stoch_k ? lastData.stoch_k.toFixed(1) : '--';
             document.getElementById('vol-val').textContent = lastData.volume ? lastData.volume.toFixed(2) : '--';
         }
     }
@@ -700,7 +741,7 @@ document.addEventListener('DOMContentLoaded', () => {
                 if (candleData.length > 0) {
                     candles.setData(candleData);
-                    const emaData = safeMap(d, 'ema');
                     if (emaData.length > 0) ema.setData(emaData);
                     const bbUpperData = safeMap(d, 'bb_upper');
@@ -770,6 +811,7 @@ document.addEventListener('DOMContentLoaded', () => {
 async def fetch_initial_data():
     try:
         async with aiohttp.ClientSession() as session:
             url = "https://api.kraken.com/0/public/OHLC?pair=XBTUSD&interval=1"
             async with session.get(url, timeout=aiohttp.ClientTimeout(total=30)) as response:
                 if response.status == 200:
@@ -787,7 +829,7 @@ async def fetch_initial_data():
                                         'close': float(c[4]),
                                         'volume': float(c[6])
                                     }
-                                    for c in raw[-720:]
                                 ]
                                 market_state['ready'] = True
                                 logging.info(f"Loaded {len(market_state['ohlc_history'])} initial candles")
@@ -822,21 +864,25 @@ async def kraken_rest_worker():
                                         for c in raw[-10:]
                                     ]
                                     if market_state['ohlc_history']:
                                         existing_times = {c['time'] for c in market_state['ohlc_history']}
                                         for nc in new_candles:
                                             if nc['time'] in existing_times:
                                                 for i, ec in enumerate(market_state['ohlc_history']):
                                                     if ec['time'] == nc['time']:
                                                         market_state['ohlc_history'][i] = nc
                                                         break
                                             else:
                                                 market_state['ohlc_history'].append(nc)
                                         market_state['ohlc_history'].sort(key=lambda x: x['time'])
-                                        if len(market_state['ohlc_history']) > 800:
-                                            market_state['ohlc_history'] = market_state['ohlc_history'][-800:]
                                     market_state['ready'] = True
                                     break

 from aiohttp import web
 from sklearn.ensemble import RandomForestRegressor
+# --- CONFIGURATION ---
 SYMBOL_KRAKEN = "BTC/USD"
 PORT = 7860
 BROADCAST_RATE = 1.0
+PREDICTION_HORIZON = 100  # Predict next 100 candles
+MAX_HISTORY = 5000        # Store up to 5000 candles for training
+TRAIN_INTERVAL = 300      # Retrain model every 5 minutes
 logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(message)s')
     for c in cols:
         df[c] = df[c].astype(float)
+    # --- Standard Indicators ---
+    df['ema20'] = df['close'].ewm(span=20, adjust=False).mean()
+    df['ema50'] = df['close'].ewm(span=50, adjust=False).mean()
+    # Bollinger Bands
     df['std'] = df['close'].rolling(window=20).std()
+    df['bb_upper'] = df['ema20'] + (df['std'] * 2)
+    df['bb_lower'] = df['ema20'] - (df['std'] * 2)
+    # RSI
     delta = df['close'].diff()
     gain = (delta.where(delta > 0, 0)).rolling(window=14).mean()
     loss = (-delta.where(delta < 0, 0)).rolling(window=14).mean()
     rs = gain / loss
     df['rsi'] = 100 - (100 / (1 + rs))
+    # MACD
     k = df['close'].ewm(span=12, adjust=False).mean()
     d = df['close'].ewm(span=26, adjust=False).mean()
     df['macd'] = k - d
     df['macd_signal'] = df['macd'].ewm(span=9, adjust=False).mean()
     df['macd_hist'] = df['macd'] - df['macd_signal']
+    # ATR
     df['tr0'] = abs(df['high'] - df['low'])
     df['tr1'] = abs(df['high'] - df['close'].shift())
     df['tr2'] = abs(df['low'] - df['close'].shift())
     df['tr'] = df[['tr0', 'tr1', 'tr2']].max(axis=1)
     df['atr'] = df['tr'].rolling(window=14).mean()
+    # --- FEATURE ENGINEERING (Normalization) ---
+    # We create features that represent % differences rather than raw prices
+    # This helps the model learn patterns regardless of whether BTC is $20k or $100k
+    # Distance from EMAs (Percentage)
+    df['dist_ema20'] = (df['close'] - df['ema20']) / df['ema20']
+    df['dist_ema50'] = (df['close'] - df['ema50']) / df['ema50']
+    # Bollinger Band Width & Position
+    df['bb_width'] = (df['bb_upper'] - df['bb_lower']) / df['ema20']
+    df['bb_pos'] = (df['close'] - df['bb_lower']) / (df['bb_upper'] - df['bb_lower'])
+    # Volume Change
+    df['vol_change'] = df['volume'].pct_change()
+    # Log Returns (Momentum)
+    df['log_ret'] = np.log(df['close'] / df['close'].shift(1))
     return df
 def train_model(df):
+    logging.info(f"Training ML Model on {len(df)} candles...")
+    # Use normalized features for input
+    feature_cols = [
+        'rsi', 'macd_hist', 'atr',
+        'dist_ema20', 'dist_ema50',
+        'bb_width', 'bb_pos',
+        'vol_change', 'log_ret'
+    ]
     data = df.dropna().copy()
+    # --- CREATE TARGETS (Percentage Change) ---
     targets = []
+    # We want to predict the % return for the next 1 to N steps relative to CURRENT price
     for i in range(1, PREDICTION_HORIZON + 1):
+        col_name = f'target_return_{i}'
+        # Formula: (Price_Future - Price_Current) / Price_Current
+        data[col_name] = (data['close'].shift(-i) - data['close']) / data['close']
         targets.append(col_name)
     data = data.dropna()
+    if len(data) < 200:
         logging.warning("Not enough data to train model yet.")
         return None
     X = data[feature_cols].values
     y = data[targets].values
+    # Increase estimators for better stability
+    model = RandomForestRegressor(
+        n_estimators=100,
+        max_depth=15,
+        min_samples_split=5,
+        n_jobs=-1,
+        random_state=42
+    )
     model.fit(X, y)
+    logging.info(f"Model Trained successfully.")
     return model
 def get_prediction(df, model):
     if model is None:
         return []
+    feature_cols = [
+        'rsi', 'macd_hist', 'atr',
+        'dist_ema20', 'dist_ema50',
+        'bb_width', 'bb_pos',
+        'vol_change', 'log_ret'
+    ]
     last_row = df.iloc[[-1]][feature_cols]
     if last_row.isnull().values.any():
         return []
+    # The model predicts Percentage Returns
+    predicted_returns = model.predict(last_row.values)[0]
+    # Convert Percentage Returns back to Absolute Prices
+    current_price = df.iloc[-1]['close']
     current_time = int(df.iloc[-1]['time'])
     pred_data = []
+    for i, pct_change in enumerate(predicted_returns):
+        # Reconstruct: Price = Current * (1 + Predicted_Return)
+        future_price = current_price * (1 + pct_change)
         pred_data.append({
+            "time": current_time + ((i + 1) * 60), # Add 60s for each step
+            "value": float(future_price)
         })
     return pred_data
     if not market_state['ready'] or not market_state['ohlc_history']:
         return {"error": "Initializing..."}
+    # 1. Calculate Indicators
     df = calculate_indicators(market_state['ohlc_history'])
     if df is None or len(df) < 50:
         return {"error": "Not enough data"}
+    # 2. Train Model (Periodically)
+    if market_state['model'] is None or (time.time() - market_state['last_training_time'] > TRAIN_INTERVAL):
         try:
             market_state['model'] = train_model(df)
             market_state['last_training_time'] = time.time()
         except Exception as e:
             logging.error(f"Training failed: {e}")
+    # 3. Get Prediction
     predictions = []
     try:
         predictions = get_prediction(df, market_state['model'])
     except Exception as e:
         logging.error(f"Prediction failed: {e}")
+    # 4. Prepare Data for Broadcast
+    # Clean NaNs for JSON
     df_clean = df.replace([np.inf, -np.inf], np.nan)
     df_clean = df_clean.astype(object).where(pd.notnull(df_clean), None)
     market_state['last_price'] = last_close
     market_state['price_change'] = price_change
+    # Only send last 500 candles to client to save bandwidth, but keep full history in memory
+    display_data = df_clean.tail(500).to_dict('records')
     last_row = df.iloc[-1] if len(df) > 0 else {}
     return {
+        "data": display_data,
         "prediction": predictions,
         "stats": {
             "price": last_close,
         }
     }
+# --- FRONTEND HTML (No changes needed, handles price data perfectly) ---
 HTML_PAGE = """
 <!DOCTYPE html>
 <html lang="en">
             <span class="indicator-label">MACD</span>
             <span id="macd-val" class="indicator-value">--</span>
         </div>
         <div class="indicator-group">
             <span class="indicator-label">Volume</span>
             <span id="vol-val" class="indicator-value" style="color: #888">--</span>
         color: '#bf5af2',
         lineWidth: 2,
         lineStyle: LightweightCharts.LineStyle.Dashed,
+        crosshairMarkerVisible: false,
+        title: 'Forecast'
     });
     const volumeSeries = volChart.addHistogramSeries({
         }
         if (lastData) {
+            document.getElementById('ema-val').textContent = lastData.ema20 ? lastData.ema20.toFixed(2) : '--';
             document.getElementById('bb-upper').textContent = lastData.bb_upper ? lastData.bb_upper.toFixed(2) : '--';
             document.getElementById('bb-lower').textContent = lastData.bb_lower ? lastData.bb_lower.toFixed(2) : '--';
                 macdEl.style.color = macdVal >= 0 ? '#26a69a' : '#ef5350';
             }
             document.getElementById('vol-val').textContent = lastData.volume ? lastData.volume.toFixed(2) : '--';
         }
     }
                 if (candleData.length > 0) {
                     candles.setData(candleData);
+                    const emaData = safeMap(d, 'ema20');
                     if (emaData.length > 0) ema.setData(emaData);
                     const bbUpperData = safeMap(d, 'bb_upper');
 async def fetch_initial_data():
     try:
         async with aiohttp.ClientSession() as session:
+            # Although Kraken returns limited data, we set logic to accumulate it over time.
             url = "https://api.kraken.com/0/public/OHLC?pair=XBTUSD&interval=1"
             async with session.get(url, timeout=aiohttp.ClientTimeout(total=30)) as response:
                 if response.status == 200:
                                         'close': float(c[4]),
                                         'volume': float(c[6])
                                     }
+                                    for c in raw
                                 ]
                                 market_state['ready'] = True
                                 logging.info(f"Loaded {len(market_state['ohlc_history'])} initial candles")
                                         for c in raw[-10:]
                                     ]
+                                    # Intelligent Merge to keep history
                                     if market_state['ohlc_history']:
                                         existing_times = {c['time'] for c in market_state['ohlc_history']}
                                         for nc in new_candles:
                                             if nc['time'] in existing_times:
+                                                # Update existing (in case close price changed)
                                                 for i, ec in enumerate(market_state['ohlc_history']):
                                                     if ec['time'] == nc['time']:
                                                         market_state['ohlc_history'][i] = nc
                                                         break
                                             else:
+                                                # Append new
                                                 market_state['ohlc_history'].append(nc)
                                         market_state['ohlc_history'].sort(key=lambda x: x['time'])
+                                        # Keep MAX_HISTORY (5000)
+                                        if len(market_state['ohlc_history']) > MAX_HISTORY:
+                                            market_state['ohlc_history'] = market_state['ohlc_history'][-MAX_HISTORY:]
                                     market_state['ready'] = True
                                     break