Spaces:

OmidSakaki
/

VisualTradingAI

Sleeping

App Files Files Community

OmidSakaki commited on Oct 16, 2025

Commit

ae2aacf

verified ·

1 Parent(s): f77d216

Update app.py

Browse files

Files changed (1) hide show

app.py +654 -139

app.py CHANGED Viewed

@@ -1,279 +1,794 @@
 import gradio as gr
 import numpy as np
 import torch
 from pathlib import Path
-from typing import Dict, Tuple, Any
-from loguru import logger
 import yaml
-from gymnasium import spaces
 class TradingConfig:
     def __init__(self):
         self.initial_balance = 10000.0
         self.max_steps = 1000
         self.transaction_cost = 0.001
         self.risk_level = "Medium"
         self.asset_type = "Crypto"
         self.learning_rate = 0.0001
-        self.gamma = 0.99
         self.epsilon_start = 1.0
         self.epsilon_min = 0.01
         self.epsilon_decay = 0.9995
         self.batch_size = 32
         self.memory_size = 10000
         self.target_update = 100
 class AdvancedTradingEnvironment:
-    def __init__(self, config):
         self.initial_balance = config.initial_balance
         self.balance = self.initial_balance
         self.position = 0.0
         self.current_price = 100.0
         self.step_count = 0
         self.max_steps = config.max_steps
         self.price_history = []
         self.sentiment_history = []
-        self._initialize_data()
-        self.action_space = spaces.Discrete(4)
-        self.observation_space = spaces.Box(low=-2.0, high=2.0, shape=(12,), dtype=np.float32)
-    def _initialize_data(self):
-        n_points = 100
         base_price = 100.0
         for i in range(n_points):
-            price = base_price + np.sin(i * 0.1) * 10 + np.random.normal(0, 2)
-            self.price_history.append(max(10.0, price))
-            sentiment = 0.5 + np.random.normal(0, 0.1)
             self.sentiment_history.append(np.clip(sentiment, 0.0, 1.0))
         self.current_price = self.price_history[-1]
-    def reset(self):
         self.balance = self.initial_balance
         self.position = 0.0
         self.step_count = 0
         self.price_history = [100.0 + np.random.normal(0, 5)]
-        self.sentiment_history = [0.5]
         obs = self._get_observation()
         info = self._get_info()
         return obs, info
-    def step(self, action):
         self.step_count += 1
-        price_change = np.random.normal(0, 0.02)
-        self.current_price = max(10.0, self.current_price * (1 + price_change))
-        self.price_history.append(self.current_price)
-        sentiment_change = np.random.normal(0, 0.05)
-        new_sentiment = np.clip(self.sentiment_history[-1] + sentiment_change, 0.0, 1.0)
-        self.sentiment_history.append(new_sentiment)
         reward = self._execute_action(action)
         terminated = self.balance <= 0 or self.step_count >= self.max_steps
         truncated = False
         obs = self._get_observation()
         info = self._get_info()
         return obs, reward, terminated, truncated, info
-    def _execute_action(self, action):
-        reward = 0.0
         prev_net_worth = self.balance + self.position * self.current_price
         if action == 1:  # Buy
-            trade_amount = min(self.balance * 0.2, self.balance)
-            cost = trade_amount
-            if cost <= self.balance:
-                self.position += trade_amount / self.current_price
-                self.balance -= cost
         elif action == 2:  # Sell
             if self.position > 0:
-                sell_amount = min(self.position * 0.2, self.position)
-                proceeds = sell_amount * self.current_price
-                self.position -= sell_amount
                 self.balance += proceeds
-        elif action == 3:  # Close
             if self.position > 0:
-                proceeds = self.position * self.current_price
                 self.balance += proceeds
                 self.position = 0
-        net_worth = self.balance + self.position * self.current_price
-        reward = (net_worth - prev_net_worth) / self.initial_balance * 100
-        return reward
-    def _get_observation(self):
-        recent_prices = self.price_history[-10:] if len(self.price_history) >= 10 else [self.current_price] * 10
-        recent_sentiments = self.sentiment_history[-10:] if len(self.sentiment_history) >= 10 else [0.5] * 10
-        features = [
-            self.balance / self.initial_balance,
-            self.position * self.current_price / self.initial_balance,
             self.current_price / 100.0,
             np.mean(recent_prices) / 100.0,
             np.std(recent_prices) / 100.0,
             np.mean(recent_sentiments),
             np.std(recent_sentiments),
-            self.step_count / self.max_steps,
-            0.0, 0.0, 0.0, 0.0  # Padding
         ]
-        return np.array(features[:12], dtype=np.float32)
-    def _get_info(self):
         net_worth = self.balance + self.position * self.current_price
-        return {'net_worth': net_worth}
 class DQNAgent:
-    def __init__(self, state_dim, action_dim, config, device='cpu'):
         self.device = torch.device(device)
-        self.q_network = torch.nn.Sequential(
-            torch.nn.Linear(state_dim, 128),
-            torch.nn.ReLU(),
-            torch.nn.Linear(128, 128),
-            torch.nn.ReLU(),
-            torch.nn.Linear(128, action_dim)
-        ).to(self.device)
-        self.target_network = torch.nn.Sequential(
-            torch.nn.Linear(state_dim, 128),
-            torch.nn.ReLU(),
-            torch.nn.Linear(128, 128),
-            torch.nn.ReLU(),
-            torch.nn.Linear(128, action_dim)
-        ).to(self.device)
         self.target_network.load_state_dict(self.q_network.state_dict())
-        self.optimizer = torch.optim.Adam(self.q_network.parameters(), lr=config.learning_rate)
         self.memory = deque(maxlen=config.memory_size)
-        self.gamma = config.gamma
         self.epsilon = config.epsilon_start
         self.epsilon_min = config.epsilon_min
         self.epsilon_decay = config.epsilon_decay
         self.batch_size = config.batch_size
         self.target_update = config.target_update
         self.steps = 0
-    def select_action(self, state, training=True):
-        state = torch.FloatTensor(state).unsqueeze(0).to(self.device)
         if training and random.random() < self.epsilon:
-            return random.randint(0, 3)
         with torch.no_grad():
-            return self.q_network(state).argmax(1).item()
-    def store_transition(self, state, action, reward, next_state, done):
         self.memory.append((state, action, reward, next_state, done))
-    def update(self):
         if len(self.memory) < self.batch_size:
             return 0.0
         batch = random.sample(self.memory, self.batch_size)
         states, actions, rewards, next_states, dones = zip(*batch)
         states = torch.FloatTensor(np.array(states)).to(self.device)
         actions = torch.LongTensor(actions).to(self.device)
         rewards = torch.FloatTensor(rewards).to(self.device)
         next_states = torch.FloatTensor(np.array(next_states)).to(self.device)
-        dones = torch.FloatTensor(dones).to(self.device)
-        current_q = self.q_network(states).gather(1, actions.unsqueeze(1)).squeeze(1)
-        next_q = self.target_network(next_states).max(1)[0]
-        target_q = rewards + self.gamma * next_q * (1 - dones)
-        loss = torch.nn.MSELoss()(current_q, target_q)
         self.optimizer.zero_grad()
         loss.backward()
         self.optimizer.step()
         self.steps += 1
         if self.steps % self.target_update == 0:
             self.target_network.load_state_dict(self.q_network.state_dict())
         self.epsilon = max(self.epsilon_min, self.epsilon * self.epsilon_decay)
         return loss.item()
 class TradingDemo:
     def __init__(self):
         self.config = TradingConfig()
         self.env = None
         self.agent = None
-        self.device = 'cpu'
-    def initialize(self, balance, risk, asset):
-        self.config.initial_balance = balance
-        self.config.risk_level = risk
-        self.config.asset_type = asset
-        self.env = AdvancedTradingEnvironment(self.config)
-        self.agent = DQNAgent(12, 4, self.config, self.device)
-        return "✅ Initialized!"
-    def train(self, episodes):
-        for ep in range(episodes):
             obs, _ = self.env.reset()
-            total_reward = 0
-            done = False
-            while not done:
-                action = self.agent.select_action(obs)
                 next_obs, reward, done, _, info = self.env.step(action)
-                self.agent.store_transition(obs, action, reward, next_obs, done)
                 obs = next_obs
-                total_reward += reward
-            self.agent.update()
-            yield f"Episode {ep+1}/{episodes} | Reward: {total_reward:.2f}", None
-        yield "✅ Training complete!", None
-    def simulate(self, steps):
-        obs, _ = self.env.reset()
-        prices = []
-        actions = []
-        net_worths = []
-        for _ in range(steps):
-            action = self.agent.select_action(obs, training=False)
-            next_obs, reward, done, _, info = self.env.step(action)
-            prices.append(self.env.current_price)
-            actions.append(action)
-            net_worths.append(info['net_worth'])
-            obs = next_obs
-            if done:
-                break
-        import plotly.graph_objects as go
-        fig = go.Figure()
-        fig.add_trace(go.Scatter(y=prices, mode='lines', name='Price'))
-        fig.add_trace(go.Scatter(y=net_worths, mode='lines', name='Net Worth'))
-        return "✅ Simulation complete!", fig
-demo = TradingDemo()
-with gr.Blocks() as interface:
-    gr.Markdown("# Trading AI Demo")
-    with gr.Row():
-        balance = gr.Slider(1000, 50000, 10000, label="Balance")
-        risk = gr.Radio(["Low", "Medium", "High"], value="Medium", label="Risk")
-        asset = gr.Radio(["Crypto", "Stock", "Forex"], value="Crypto", label="Asset")
-        init_btn = gr.Button("Initialize")
-    status = gr.Textbox(label="Status")
-    episodes = gr.Number(value=50, label="Episodes")
-    train_btn = gr.Button("Train")
-    train_plot = gr.Plot()
-    steps = gr.Number(value=100, label="Simulation Steps")
-    sim_btn = gr.Button("Simulate")
-    sim_plot = gr.Plot()
-    init_btn.click(demo.initialize, [balance, risk, asset], status)
-    train_btn.click(demo.train, episodes, [status, train_plot])
-    sim_btn.click(demo.simulate, steps, [status, sim_plot])
-interface.launch()

 import gradio as gr
 import numpy as np
 import torch
+import torch.nn as nn
+import torch.optim as optim
+from collections import deque
+import random
 from pathlib import Path
+from typing import Dict, Tuple, Any, List
+import plotly.graph_objects as go
+from plotly.subplots import make_subplots
 import yaml
+# Configuration class for trading parameters
 class TradingConfig:
+    """
+    Configuration class for trading environment and agent parameters.
+    Centralizes all configurable parameters for easy modification.
+    """
     def __init__(self):
+        # Environment parameters
         self.initial_balance = 10000.0
         self.max_steps = 1000
         self.transaction_cost = 0.001
         self.risk_level = "Medium"
         self.asset_type = "Crypto"
+        # DQN agent parameters
         self.learning_rate = 0.0001
+        self.gamma = 0.99  # Discount factor
         self.epsilon_start = 1.0
         self.epsilon_min = 0.01
         self.epsilon_decay = 0.9995
         self.batch_size = 32
         self.memory_size = 10000
         self.target_update = 100
+        self.hidden_size = 128
+        # Risk adjustment factors based on risk level
+        self.risk_multipliers = {
+            "Low": 0.5,
+            "Medium": 1.0,
+            "High": 2.0
+        }
 class AdvancedTradingEnvironment:
+    """
+    Advanced trading environment simulating financial markets with multiple assets.
+    Supports crypto, stocks, and forex with realistic price movements and sentiment analysis.
+    """
+    def __init__(self, config: TradingConfig):
+        self.config = config
         self.initial_balance = config.initial_balance
         self.balance = self.initial_balance
         self.position = 0.0
         self.current_price = 100.0
         self.step_count = 0
         self.max_steps = config.max_steps
+        self.transaction_cost = config.transaction_cost
+        # Market data history
         self.price_history = []
+        self.volume_history = []
         self.sentiment_history = []
+        # Risk adjustment
+        self.risk_multiplier = config.risk_multipliers[config.risk_level]
+        # Initialize market data
+        self._initialize_market_data()
+        # Define action and observation spaces
+        self.action_space = self._create_action_space()
+        self.observation_space = self._create_observation_space()
+        # Portfolio tracking
+        self.portfolio_history = []
+        self.action_history = []
+    def _create_action_space(self) -> int:
+        """Define available trading actions"""
+        return 4  # 0: Hold, 1: Buy, 2: Sell, 3: Close Position
+    def _create_observation_space(self) -> Tuple:
+        """Define observation space dimensions"""
+        return (15,)  # Increased features for better state representation
+    def _initialize_market_data(self):
+        """Initialize synthetic market data based on asset type"""
+        n_points = 200  # Longer history for better indicators
+        # Different volatility based on asset type
+        volatility_map = {
+            "Crypto": 0.03,
+            "Stock": 0.015,
+            "Forex": 0.008
+        }
+        volatility = volatility_map.get(self.config.asset_type, 0.02)
         base_price = 100.0
         for i in range(n_points):
+            # More realistic price generation with momentum
+            momentum = np.sin(i * 0.05) * 2
+            noise = np.random.normal(0, volatility)
+            price = base_price * (1 + momentum * 0.01 + noise)
+            price = max(10.0, price)  # Prevent negative prices
+            self.price_history.append(price)
+            # Volume with some correlation to price movement
+            volume = 1000 + abs(price - base_price) * 50 + np.random.normal(0, 200)
+            self.volume_history.append(max(100, volume))
+            # Sentiment with persistence
+            if i > 0:
+                prev_sentiment = self.sentiment_history[-1]
+                sentiment_change = np.random.normal(0, 0.08)
+                sentiment = prev_sentiment + sentiment_change
+            else:
+                sentiment = 0.5 + np.random.normal(0, 0.1)
             self.sentiment_history.append(np.clip(sentiment, 0.0, 1.0))
         self.current_price = self.price_history[-1]
+    def _calculate_technical_indicators(self) -> List[float]:
+        """Calculate technical indicators from price history"""
+        prices = np.array(self.price_history[-50:])  # Use last 50 points
+        if len(prices) < 2:
+            return [0.0] * 6  # Default values
+        returns = np.diff(prices) / prices[:-1]
+        # Simple Moving Averages
+        sma_short = np.mean(prices[-10:]) if len(prices) >= 10 else prices[-1]
+        sma_long = np.mean(prices[-20:]) if len(prices) >= 20 else prices[-1]
+        # RSI (Relative Strength Index)
+        if len(returns) >= 14:
+            gains = returns[returns > 0]
+            losses = -returns[returns < 0]
+            avg_gain = np.mean(gains[-14:]) if len(gains) > 0 else 0.001
+            avg_loss = np.mean(losses[-14:]) if len(losses) > 0 else 0.001
+            rsi = 100 - (100 / (1 + avg_gain / avg_loss))
+        else:
+            rsi = 50.0
+        # Volatility (annualized)
+        volatility = np.std(returns) * np.sqrt(252) if len(returns) > 1 else 0.1
+        # Price momentum
+        momentum = (prices[-1] / prices[-5] - 1) if len(prices) >= 5 else 0.0
+        # Volume trend
+        volumes = np.array(self.volume_history[-10:])
+        volume_trend = np.mean(volumes[-5:]) / np.mean(volumes[-10:]) - 1 if len(volumes) >= 10 else 0.0
+        return [sma_short/100, sma_long/100, rsi/100, volatility, momentum, volume_trend]
+    def reset(self) -> Tuple[np.ndarray, Dict]:
+        """Reset environment to initial state"""
         self.balance = self.initial_balance
         self.position = 0.0
         self.step_count = 0
+        self.portfolio_history = []
+        self.action_history = []
+        # Reinitialize market data
         self.price_history = [100.0 + np.random.normal(0, 5)]
+        self.volume_history = [1000 + np.random.normal(0, 200)]
+        self.sentiment_history = [0.5 + np.random.normal(0, 0.1)]
+        self.current_price = self.price_history[-1]
         obs = self._get_observation()
         info = self._get_info()
         return obs, info
+    def step(self, action: int) -> Tuple[np.ndarray, float, bool, bool, Dict]:
+        """Execute one trading step"""
         self.step_count += 1
+        # Generate new market data with more realistic dynamics
+        self._update_market_data()
+        # Execute trading action
         reward = self._execute_action(action)
+        # Check termination conditions
         terminated = self.balance <= 0 or self.step_count >= self.max_steps
         truncated = False
+        # Get new observation and info
         obs = self._get_observation()
         info = self._get_info()
+        # Track portfolio value
+        self.portfolio_history.append(info['net_worth'])
+        self.action_history.append(action)
         return obs, reward, terminated, truncated, info
+    def _update_market_data(self):
+        """Update market data with realistic price movements"""
+        # Price change with momentum and volatility clustering
+        prev_returns = np.diff(self.price_history[-5:]) / self.price_history[-5:-1] if len(self.price_history) >= 6 else [0]
+        momentum = np.mean(prev_returns) if prev_returns else 0
+        volatility_map = {
+            "Crypto": 0.025,
+            "Stock": 0.012,
+            "Forex": 0.006
+        }
+        base_volatility = volatility_map.get(self.config.asset_type, 0.015)
+        # Volatility scaling based on risk level
+        volatility = base_volatility * self.risk_multiplier
+        price_change = momentum * 0.3 + np.random.normal(0, volatility)
+        self.current_price = max(10.0, self.current_price * (1 + price_change))
+        self.price_history.append(self.current_price)
+        # Update volume with some noise
+        base_volume = 1000
+        volume_noise = np.random.normal(0, 200)
+        new_volume = max(100, base_volume + abs(price_change) * 5000 + volume_noise)
+        self.volume_history.append(new_volume)
+        # Update sentiment with mean reversion
+        current_sentiment = self.sentiment_history[-1]
+        sentiment_reversion = (0.5 - current_sentiment) * 0.1  # Mean reversion
+        sentiment_noise = np.random.normal(0, 0.08)
+        new_sentiment = current_sentiment + sentiment_reversion + sentiment_noise
+        self.sentiment_history.append(np.clip(new_sentiment, 0.0, 1.0))
+    def _execute_action(self, action: int) -> float:
+        """Execute trading action and calculate reward"""
         prev_net_worth = self.balance + self.position * self.current_price
+        trade_size_multiplier = 0.2 * self.risk_multiplier  # Risk-adjusted position sizing
         if action == 1:  # Buy
+            if self.balance > 0:
+                trade_amount = min(self.balance * trade_size_multiplier, self.balance)
+                cost = trade_amount * (1 + self.transaction_cost)
+                if cost <= self.balance:
+                    shares_bought = trade_amount / self.current_price
+                    self.position += shares_bought
+                    self.balance -= cost
         elif action == 2:  # Sell
             if self.position > 0:
+                sell_fraction = trade_size_multiplier
+                shares_to_sell = min(self.position * sell_fraction, self.position)
+                proceeds = shares_to_sell * self.current_price * (1 - self.transaction_cost)
+                self.position -= shares_to_sell
                 self.balance += proceeds
+        elif action == 3:  # Close position
             if self.position > 0:
+                proceeds = self.position * self.current_price * (1 - self.transaction_cost)
                 self.balance += proceeds
                 self.position = 0
+        # Calculate new net worth and reward
+        new_net_worth = self.balance + self.position * self.current_price
+        raw_reward = (new_net_worth - prev_net_worth) / self.initial_balance * 100
+        # Risk-adjusted reward with penalty for large drawdowns
+        risk_penalty = 0.0
+        if new_net_worth < self.initial_balance * 0.8:  # 20% drawdown
+            risk_penalty = (self.initial_balance - new_net_worth) / self.initial_balance * 10
+        final_reward = raw_reward - risk_penalty
+        return final_reward
+    def _get_observation(self) -> np.ndarray:
+        """Get current environment observation"""
+        # Price-based features
+        recent_prices = self.price_history[-20:] if len(self.price_history) >= 20 else [self.current_price] * 20
+        price_features = [
             self.current_price / 100.0,
             np.mean(recent_prices) / 100.0,
             np.std(recent_prices) / 100.0,
+            (self.current_price - np.min(recent_prices)) / (np.max(recent_prices) - np.min(recent_prices)) if len(recent_prices) > 1 else 0.5
+        ]
+        # Portfolio features
+        portfolio_features = [
+            self.balance / self.initial_balance,
+            self.position * self.current_price / self.initial_balance,
+            self.step_count / self.max_steps
+        ]
+        # Sentiment features
+        recent_sentiments = self.sentiment_history[-10:] if len(self.sentiment_history) >= 10 else [0.5] * 10
+        sentiment_features = [
             np.mean(recent_sentiments),
             np.std(recent_sentiments),
+            recent_sentiments[-1]  # Latest sentiment
         ]
+        # Technical indicators
+        technical_features = self._calculate_technical_indicators()
+        # Combine all features
+        all_features = price_features + portfolio_features + sentiment_features + technical_features
+        # Ensure fixed size and convert to numpy array
+        observation = np.array(all_features[:15], dtype=np.float32)
+        return observation
+    def _get_info(self) -> Dict[str, Any]:
+        """Get environment information for logging"""
         net_worth = self.balance + self.position * self.current_price
+        return_total = (net_worth - self.initial_balance) / self.initial_balance * 100
+        return {
+            'net_worth': net_worth,
+            'return_percent': return_total,
+            'position_value': self.position * self.current_price,
+            'cash_balance': self.balance,
+            'current_price': self.current_price,
+            'steps': self.step_count
+        }
 class DQNAgent:
+    """
+    Deep Q-Network agent for trading decisions.
+    Implements experience replay and target network for stable learning.
+    """
+    def __init__(self, state_dim: int, action_dim: int, config: TradingConfig, device: str = 'cpu'):
         self.device = torch.device(device)
+        self.state_dim = state_dim
+        self.action_dim = action_dim
+        self.config = config
+        # Q-network and target network
+        self.q_network = self._build_network(state_dim, action_dim)
+        self.target_network = self._build_network(state_dim, action_dim)
         self.target_network.load_state_dict(self.q_network.state_dict())
+        # Optimization
+        self.optimizer = optim.Adam(self.q_network.parameters(), lr=config.learning_rate)
+        self.criterion = nn.MSELoss()
+        # Experience replay
         self.memory = deque(maxlen=config.memory_size)
+        # Exploration parameters
         self.epsilon = config.epsilon_start
         self.epsilon_min = config.epsilon_min
         self.epsilon_decay = config.epsilon_decay
+        # Training parameters
         self.batch_size = config.batch_size
+        self.gamma = config.gamma
         self.target_update = config.target_update
         self.steps = 0
+    def _build_network(self, state_dim: int, action_dim: int) -> nn.Sequential:
+        """Build the neural network for Q-value approximation"""
+        return nn.Sequential(
+            nn.Linear(state_dim, self.config.hidden_size),
+            nn.ReLU(),
+            nn.Linear(self.config.hidden_size, self.config.hidden_size),
+            nn.ReLU(),
+            nn.Linear(self.config.hidden_size, self.config.hidden_size // 2),
+            nn.ReLU(),
+            nn.Linear(self.config.hidden_size // 2, action_dim)
+        ).to(self.device)
+    def select_action(self, state: np.ndarray, training: bool = True) -> int:
+        """Select action using epsilon-greedy policy"""
         if training and random.random() < self.epsilon:
+            return random.randint(0, self.action_dim - 1)
+        state_tensor = torch.FloatTensor(state).unsqueeze(0).to(self.device)
         with torch.no_grad():
+            q_values = self.q_network(state_tensor)
+            return q_values.argmax(1).item()
+    def store_transition(self, state: np.ndarray, action: int, reward: float,
+                        next_state: np.ndarray, done: bool):
+        """Store experience in replay memory"""
         self.memory.append((state, action, reward, next_state, done))
+    def update(self) -> float:
+        """Update Q-network using experience replay"""
         if len(self.memory) < self.batch_size:
             return 0.0
+        # Sample batch from memory
         batch = random.sample(self.memory, self.batch_size)
         states, actions, rewards, next_states, dones = zip(*batch)
+        # Convert to tensors
         states = torch.FloatTensor(np.array(states)).to(self.device)
         actions = torch.LongTensor(actions).to(self.device)
         rewards = torch.FloatTensor(rewards).to(self.device)
         next_states = torch.FloatTensor(np.array(next_states)).to(self.device)
+        dones = torch.BoolTensor(dones).to(self.device)
+        # Current Q values
+        current_q_values = self.q_network(states).gather(1, actions.unsqueeze(1)).squeeze(1)
+        # Next Q values from target network
+        with torch.no_grad():
+            next_q_values = self.target_network(next_states).max(1)[0]
+            target_q_values = rewards + self.gamma * next_q_values * (~dones)
+        # Compute loss and update
+        loss = self.criterion(current_q_values, target_q_values)
         self.optimizer.zero_grad()
         loss.backward()
+        # Gradient clipping for stability
+        torch.nn.utils.clip_grad_norm_(self.q_network.parameters(), 1.0)
         self.optimizer.step()
+        # Update target network periodically
         self.steps += 1
         if self.steps % self.target_update == 0:
             self.target_network.load_state_dict(self.q_network.state_dict())
+        # Decay epsilon
         self.epsilon = max(self.epsilon_min, self.epsilon * self.epsilon_decay)
         return loss.item()
+    def save(self, path: str):
+        """Save agent parameters"""
+        torch.save({
+            'q_network_state_dict': self.q_network.state_dict(),
+            'target_network_state_dict': self.target_network.state_dict(),
+            'optimizer_state_dict': self.optimizer.state_dict(),
+            'epsilon': self.epsilon,
+            'steps': self.steps
+        }, path)
+    def load(self, path: str):
+        """Load agent parameters"""
+        checkpoint = torch.load(path, map_location=self.device)
+        self.q_network.load_state_dict(checkpoint['q_network_state_dict'])
+        self.target_network.load_state_dict(checkpoint['target_network_state_dict'])
+        self.optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
+        self.epsilon = checkpoint['epsilon']
+        self.steps = checkpoint['steps']
 class TradingDemo:
+    """
+    Main demonstration class integrating trading environment and DQN agent.
+    Provides interface for training, simulation, and visualization.
+    """
     def __init__(self):
         self.config = TradingConfig()
         self.env = None
         self.agent = None
+        self.device = 'cuda' if torch.cuda.is_available() else 'cpu'
+        print(f"Using device: {self.device}")
+        # Training history
+        self.training_history = {
+            'episode_rewards': [],
+            'episode_losses': [],
+            'epsilon_history': []
+        }
+    def initialize(self, balance: float, risk: str, asset: str) -> str:
+        """Initialize trading environment and agent"""
+        try:
+            self.config.initial_balance = float(balance)
+            self.config.risk_level = risk
+            self.config.asset_type = asset
+            # Create environment and agent
+            self.env = AdvancedTradingEnvironment(self.config)
+            self.agent = DQNAgent(15, 4, self.config, self.device)
+            # Reset training history
+            self.training_history = {
+                'episode_rewards': [],
+                'episode_losses': [],
+                'epsilon_history': []
+            }
+            return f"✅ System initialized! Balance: ${balance}, Risk: {risk}, Asset: {asset}"
+        except Exception as e:
+            return f"❌ Initialization failed: {str(e)}"
+    def train(self, episodes: int):
+        """Train the DQN agent"""
+        if self.env is None or self.agent is None:
+            yield "❌ Please initialize the system first!", None
+            return
+        try:
+            episodes = int(episodes)
+            for episode in range(episodes):
+                # Reset environment
+                obs, _ = self.env.reset()
+                total_reward = 0
+                episode_loss = 0
+                update_count = 0
+                done = False
+                while not done:
+                    # Select and execute action
+                    action = self.agent.select_action(obs)
+                    next_obs, reward, done, _, info = self.env.step(action)
+                    # Store experience and update
+                    self.agent.store_transition(obs, action, reward, next_obs, done)
+                    loss = self.agent.update()
+                    if loss > 0:
+                        episode_loss += loss
+                        update_count += 1
+                    total_reward += reward
+                    obs = next_obs
+                # Calculate average loss
+                avg_loss = episode_loss / max(update_count, 1)
+                # Update history
+                self.training_history['episode_rewards'].append(total_reward)
+                self.training_history['episode_losses'].append(avg_loss)
+                self.training_history['epsilon_history'].append(self.agent.epsilon)
+                # Yield progress
+                progress = f"Episode {episode+1}/{episodes} | " \
+                         f"Reward: {total_reward:.2f} | " \
+                         f"Loss: {avg_loss:.4f} | " \
+                         f"Epsilon: {self.agent.epsilon:.3f} | " \
+                         f"Net Worth: ${info['net_worth']:.2f}"
+                # Create training plot every 10 episodes or at the end
+                if (episode + 1) % 10 == 0 or episode == episodes - 1:
+                    plot = self._create_training_plot()
+                    yield progress, plot
+                else:
+                    yield progress, None
+            yield "✅ Training completed successfully!", self._create_training_plot()
+        except Exception as e:
+            yield f"❌ Training error: {str(e)}", None
+    def simulate(self, steps: int):
+        """Run trading simulation with current policy"""
+        if self.env is None or self.agent is None:
+            return "❌ Please initialize and train the system first!", None
+        try:
+            steps = int(steps)
             obs, _ = self.env.reset()
+            # Tracking data
+            prices = []
+            actions = []
+            net_worths = []
+            portfolio_values = []
+            cash_balances = []
+            for step in range(steps):
+                action = self.agent.select_action(obs, training=False)
                 next_obs, reward, done, _, info = self.env.step(action)
+                # Track metrics
+                prices.append(self.env.current_price)
+                actions.append(action)
+                net_worths.append(info['net_worth'])
+                portfolio_values.append(info['position_value'])
+                cash_balances.append(info['cash_balance'])
                 obs = next_obs
+                if done:
+                    break
+            # Create comprehensive visualization
+            fig = self._create_simulation_plot(prices, actions, net_worths, portfolio_values, cash_balances)
+            final_return = (net_worths[-1] - self.config.initial_balance) / self.config.initial_balance * 100
+            result_text = f"✅ Simulation completed! Final Return: {final_return:.2f}% | " \
+                         f"Final Net Worth: ${net_worths[-1]:.2f}"
+            return result_text, fig
+        except Exception as e:
+            return f"❌ Simulation error: {str(e)}", None
+    def _create_training_plot(self):
+        """Create training progress visualization"""
+        if not self.training_history['episode_rewards']:
+            return None
+        episodes = list(range(1, len(self.training_history['episode_rewards']) + 1))
+        fig = make_subplots(rows=2, cols=2,
+                           subplot_titles=('Episode Rewards', 'Training Loss',
+                                         'Epsilon Decay', 'Moving Average Reward'),
+                           vertical_spacing=0.12)
+        # Rewards
+        fig.add_trace(
+            go.Scatter(x=episodes, y=self.training_history['episode_rewards'],
+                      mode='lines', name='Reward', line=dict(color='blue')),
+            row=1, col=1
+        )
+        # Loss
+        fig.add_trace(
+            go.Scatter(x=episodes, y=self.training_history['episode_losses'],
+                      mode='lines', name='Loss', line=dict(color='red')),
+            row=1, col=2
+        )
+        # Epsilon
+        fig.add_trace(
+            go.Scatter(x=episodes, y=self.training_history['epsilon_history'],
+                      mode='lines', name='Epsilon', line=dict(color='green')),
+            row=2, col=1
+        )
+        # Moving average reward
+        window = min(20, len(episodes))
+        moving_avg = [np.mean(self.training_history['episode_rewards'][max(0, i-window):i+1])
+                     for i in range(len(episodes))]
+        fig.add_trace(
+            go.Scatter(x=episodes, y=moving_avg,
+                      mode='lines', name='MA Reward', line=dict(color='orange', width=2)),
+            row=2, col=2
+        )
+        fig.update_layout(height=600, showlegend=True, title_text="Training Progress")
+        return fig
+    def _create_simulation_plot(self, prices, actions, net_worths, portfolio_values, cash_balances):
+        """Create comprehensive simulation results visualization"""
+        fig = make_subplots(rows=2, cols=2,
+                           subplot_titles=('Price & Actions', 'Portfolio Performance',
+                                         'Portfolio Composition', 'Action Distribution'),
+                           vertical_spacing=0.12,
+                           horizontal_spacing=0.1)
+        steps = list(range(len(prices)))
+        # Price and actions
+        fig.add_trace(
+            go.Scatter(x=steps, y=prices, mode='lines', name='Price', line=dict(color='blue')),
+            row=1, col=1
+        )
+        # Add action markers
+        action_colors = ['gray', 'green', 'red', 'orange']  # Hold, Buy, Sell, Close
+        action_names = ['Hold', 'Buy', 'Sell', 'Close']
+        for action in range(4):
+            action_indices = [i for i, a in enumerate(actions) if a == action]
+            if action_indices:
+                action_prices = [prices[i] for i in action_indices]
+                fig.add_trace(
+                    go.Scatter(x=action_indices, y=action_prices,
+                              mode='markers', name=action_names[action],
+                              marker=dict(color=action_colors[action], size=8)),
+                    row=1, col=1
+                )
+        # Portfolio performance
+        initial_balance = self.config.initial_balance
+        returns = [(nw - initial_balance) / initial_balance * 100 for nw in net_worths]
+        fig.add_trace(
+            go.Scatter(x=steps, y=net_worths, mode='lines', name='Net Worth', line=dict(color='purple')),
+            row=1, col=2
+        )
+        fig.add_trace(
+            go.Scatter(x=steps, y=returns, mode='lines', name='Return %', line=dict(color='orange'), yaxis='y2'),
+            row=1, col=2
+        )
+        # Portfolio composition
+        fig.add_trace(
+            go.Scatter(x=steps, y=portfolio_values, mode='lines', name='Portfolio Value', line=dict(color='green')),
+            row=2, col=1
+        )
+        fig.add_trace(
+            go.Scatter(x=steps, y=cash_balances, mode='lines', name='Cash Balance', line=dict(color='blue')),
+            row=2, col=1
+        )
+        # Action distribution
+        action_counts = [actions.count(i) for i in range(4)]
+        fig.add_trace(
+            go.Bar(x=action_names, y=action_counts,
+                  marker_color=action_colors, name='Action Count'),
+            row=2, col=2
+        )
+        # Update layout
+        fig.update_layout(height=700, showlegend=True, title_text="Trading Simulation Results")
+        fig.update_yaxes(title_text="Return (%)", row=1, col=2, secondary_y=True)
+        fig.update_yaxes(title_text="Value ($)", row=1, col=2, secondary_y=False)
+        return fig
+# Create and launch Gradio interface
+def create_interface():
+    """Create Gradio interface for the trading demo"""
+    demo = TradingDemo()
+    with gr.Blocks(theme=gr.themes.Soft(), title="AI Trading Demo") as interface:
+        gr.Markdown("""
+        # 🤖 Advanced AI Trading Demo
+        **Deep Reinforcement Learning for Financial Markets**
+        This demo shows a DQN agent learning to trade in simulated financial markets.
+        The agent learns optimal trading strategies through reinforcement learning.
+        """)
+        with gr.Row():
+            with gr.Column(scale=1):
+                gr.Markdown("## 🎯 Configuration")
+                balance = gr.Slider(1000, 50000, 10000, step=1000, label="Initial Balance ($)")
+                risk = gr.Radio(["Low", "Medium", "High"], value="Medium", label="Risk Level")
+                asset = gr.Radio(["Crypto", "Stock", "Forex"], value="Crypto", label="Asset Type")
+                init_btn = gr.Button("🚀 Initialize System", variant="primary")
+            with gr.Column(scale=2):
+                gr.Markdown("## 📊 System Status")
+                status = gr.Textbox(label="Status", value="Click 'Initialize System' to start", interactive=False)
+        with gr.Row():
+            with gr.Column():
+                gr.Markdown("## 🏋️ Training")
+                episodes = gr.Number(value=100, label="Training Episodes", precision=0)
+                train_btn = gr.Button("🎯 Start Training", variant="primary")
+                train_plot = gr.Plot(label="Training Progress")
+            with gr.Column():
+                gr.Markdown("## 📈 Simulation")
+                steps = gr.Number(value=200, label="Simulation Steps", precision=0)
+                sim_btn = gr.Button("▶️ Run Simulation", variant="primary")
+                sim_plot = gr.Plot(label="Simulation Results")
+        # Event handlers
+        init_btn.click(
+            demo.initialize,
+            inputs=[balance, risk, asset],
+            outputs=status
+        )
+        train_btn.click(
+            demo.train,
+            inputs=episodes,
+            outputs=[status, train_plot]
+        )
+        sim_btn.click(
+            demo.simulate,
+            inputs=steps,
+            outputs=[status, sim_plot]
+        )
+        gr.Markdown("""
+        ## 📖 How to Use:
+        1. **Configure**: Set your initial balance, risk level, and asset type
+        2. **Initialize**: Click 'Initialize System' to set up the trading environment
+        3. **Train**: Start training the AI agent (recommended: 100+ episodes)
+        4. **Simulate**: Run a trading simulation to see the trained agent in action
+        ## 🎮 Actions:
+        - **0: Hold** - Maintain current position
+        - **1: Buy** - Purchase asset (20% of balance)
+        - **2: Sell** - Sell portion of position (20%)
+        - **3: Close** - Liquidate entire position
+        """)
+    return interface
+# Launch the application
+if __name__ == "__main__":
+    interface = create_interface()
+    interface.launch(share=True, server_name="0.0.0.0", server_port=7860)