Spaces:

OmidSakaki
/

VisualTradingAI

Sleeping

App Files Files Community

OmidSakaki commited on Oct 16, 2025

Commit

92a27f9

verified ·

1 Parent(s): 093afbf

Update app.py

Browse files

Files changed (1) hide show

app.py +498 -9

app.py CHANGED Viewed

@@ -5,19 +5,513 @@ import torch.nn as nn
 import torch.optim as optim
 from collections import deque
 import random
-from typing import Dict, Tuple, Any, List, Optional
 import plotly.graph_objects as go
 from plotly.subplots import make_subplots
-# (تمام کلاس‌های TradingConfig, AdvancedTradingEnvironment, DQNAgent, TradingDemo عیناً حفظ شده‌اند...)
-# این تابع همانند قبل است
 def create_interface():
     demo = TradingDemo()
     with gr.Blocks(theme=gr.themes.Soft(), title="AI Trading Demo") as interface:
         gr.Markdown("""
         # 🤖 Advanced AI Trading Demo
         **Deep Reinforcement Learning for Financial Markets**
         This demo shows a DQN agent learning to trade in simulated financial markets.
         The agent learns optimal trading strategies through reinforcement learning.
         """)
@@ -29,7 +523,6 @@ def create_interface():
                 risk = gr.Radio(["Low", "Medium", "High"], value="Medium", label="Risk Level")
                 asset = gr.Radio(["Crypto", "Stock", "Forex"], value="Crypto", label="Asset Type")
                 init_btn = gr.Button("🚀 Initialize System", variant="primary")
             with gr.Column(scale=2):
                 gr.Markdown("## 📊 System Status")
                 status = gr.Textbox(label="Status", value="Click 'Initialize System' to start", interactive=False)
@@ -40,26 +533,22 @@ def create_interface():
                 episodes = gr.Number(value=100, label="Training Episodes", precision=0)
                 train_btn = gr.Button("🎯 Start Training", variant="primary")
                 train_plot = gr.Plot(label="Training Progress")
             with gr.Column():
                 gr.Markdown("## 📈 Simulation")
                 steps = gr.Number(value=200, label="Simulation Steps", precision=0)
                 sim_btn = gr.Button("▶️ Run Simulation", variant="primary")
                 sim_plot = gr.Plot(label="Simulation Results")
-        # Event handlers
         init_btn.click(
             demo.initialize,
             inputs=[balance, risk, asset],
             outputs=status
         )
         train_btn.click(
             demo.train,
             inputs=episodes,
             outputs=[status, train_plot]
         )
         sim_btn.click(
             demo.simulate,
             inputs=steps,
@@ -72,6 +561,7 @@ def create_interface():
         2. **Initialize**: Click 'Initialize System' to set up the trading environment
         3. **Train**: Start training the AI agent (recommended: 100+ episodes)
         4. **Simulate**: Run a trading simulation to see the trained agent in action
         ## 🎮 Actions:
         - **0: Hold** - Maintain current position
         - **1: Buy** - Purchase asset (20% of balance)
@@ -80,5 +570,4 @@ def create_interface():
         """)
     return interface
-# نکته مهم: فقط این خط باید اجرا شود و نام متغیر باید demo باشد
 demo = create_interface()

 import torch.optim as optim
 from collections import deque
 import random
+from typing import Dict, Tuple, Any, List
 import plotly.graph_objects as go
 from plotly.subplots import make_subplots
+# ==== 1. Configuration Class ====
+class TradingConfig:
+    """
+    Central configuration for trading environment and agent.
+    """
+    def __init__(self):
+        # Environment settings
+        self.initial_balance = 10000.0
+        self.max_steps = 1000
+        self.transaction_cost = 0.001
+        self.risk_level = "Medium"
+        self.asset_type = "Crypto"
+        # DQN agent settings
+        self.learning_rate = 0.0001
+        self.gamma = 0.99
+        self.epsilon_start = 1.0
+        self.epsilon_min = 0.01
+        self.epsilon_decay = 0.9995
+        self.batch_size = 32
+        self.memory_size = 10000
+        self.target_update = 100
+        self.hidden_size = 128
+        # Risk multipliers
+        self.risk_multipliers = {
+            "Low": 0.5,
+            "Medium": 1.0,
+            "High": 2.0
+        }
+# ==== 2. Trading Environment ====
+class AdvancedTradingEnvironment:
+    """
+    Simulates a financial market with synthetic data, multi-asset support,
+    and technical/sentiment indicators.
+    """
+    def __init__(self, config: TradingConfig):
+        self.config = config
+        self.initial_balance = config.initial_balance
+        self.balance = self.initial_balance
+        self.position = 0.0
+        self.current_price = 100.0
+        self.step_count = 0
+        self.max_steps = config.max_steps
+        self.transaction_cost = config.transaction_cost
+        # Market data
+        self.price_history = []
+        self.volume_history = []
+        self.sentiment_history = []
+        # Risk multiplier
+        self.risk_multiplier = config.risk_multipliers[config.risk_level]
+        self._initialize_market_data()
+        self.action_space = 4  # Hold, Buy, Sell, Close
+        self.observation_space = (15,)
+        # For plotting
+        self.portfolio_history = []
+        self.action_history = []
+    def _initialize_market_data(self):
+        n_points = 200
+        volatility_map = {"Crypto": 0.03, "Stock": 0.015, "Forex": 0.008}
+        volatility = volatility_map.get(self.config.asset_type, 0.02)
+        base_price = 100.0
+        for i in range(n_points):
+            momentum = np.sin(i * 0.05) * 2
+            noise = np.random.normal(0, volatility)
+            price = base_price * (1 + momentum * 0.01 + noise)
+            price = max(10.0, price)
+            self.price_history.append(price)
+            volume = 1000 + abs(price - base_price) * 50 + np.random.normal(0, 200)
+            self.volume_history.append(max(100, volume))
+            if i > 0:
+                prev_sentiment = self.sentiment_history[-1]
+                sentiment_change = np.random.normal(0, 0.08)
+                sentiment = prev_sentiment + sentiment_change
+            else:
+                sentiment = 0.5 + np.random.normal(0, 0.1)
+            self.sentiment_history.append(np.clip(sentiment, 0.0, 1.0))
+        self.current_price = self.price_history[-1]
+    def _calculate_technical_indicators(self) -> List[float]:
+        prices = np.array(self.price_history[-50:])
+        if len(prices) < 2:
+            return [0.0] * 6
+        returns = np.diff(prices) / prices[:-1]
+        sma_short = np.mean(prices[-10:]) if len(prices) >= 10 else prices[-1]
+        sma_long = np.mean(prices[-20:]) if len(prices) >= 20 else prices[-1]
+        if len(returns) >= 14:
+            gains = returns[returns > 0]
+            losses = -returns[returns < 0]
+            avg_gain = np.mean(gains[-14:]) if len(gains) > 0 else 0.001
+            avg_loss = np.mean(losses[-14:]) if len(losses) > 0 else 0.001
+            rsi = 100 - (100 / (1 + avg_gain / avg_loss))
+        else:
+            rsi = 50.0
+        volatility = np.std(returns) * np.sqrt(252) if len(returns) > 1 else 0.1
+        momentum = (prices[-1] / prices[-5] - 1) if len(prices) >= 5 else 0.0
+        volumes = np.array(self.volume_history[-10:])
+        volume_trend = np.mean(volumes[-5:]) / np.mean(volumes[-10:]) - 1 if len(volumes) >= 10 else 0.0
+        return [sma_short/100, sma_long/100, rsi/100, volatility, momentum, volume_trend]
+    def reset(self) -> Tuple[np.ndarray, Dict]:
+        self.balance = self.initial_balance
+        self.position = 0.0
+        self.step_count = 0
+        self.portfolio_history = []
+        self.action_history = []
+        self.price_history = [100.0 + np.random.normal(0, 5)]
+        self.volume_history = [1000 + np.random.normal(0, 200)]
+        self.sentiment_history = [0.5 + np.random.normal(0, 0.1)]
+        self.current_price = self.price_history[-1]
+        obs = self._get_observation()
+        info = self._get_info()
+        return obs, info
+    def step(self, action: int) -> Tuple[np.ndarray, float, bool, bool, Dict]:
+        self.step_count += 1
+        self._update_market_data()
+        reward = self._execute_action(action)
+        terminated = self.balance <= 0 or self.step_count >= self.max_steps
+        truncated = False
+        obs = self._get_observation()
+        info = self._get_info()
+        self.portfolio_history.append(info['net_worth'])
+        self.action_history.append(action)
+        return obs, reward, terminated, truncated, info
+    def _update_market_data(self):
+        prev_returns = np.diff(self.price_history[-5:]) / self.price_history[-5:-1] if len(self.price_history) >= 6 else [0]
+        momentum = np.mean(prev_returns) if prev_returns else 0
+        volatility_map = {"Crypto": 0.025, "Stock": 0.012, "Forex": 0.006}
+        base_volatility = volatility_map.get(self.config.asset_type, 0.015)
+        volatility = base_volatility * self.risk_multiplier
+        price_change = momentum * 0.3 + np.random.normal(0, volatility)
+        self.current_price = max(10.0, self.current_price * (1 + price_change))
+        self.price_history.append(self.current_price)
+        base_volume = 1000
+        volume_noise = np.random.normal(0, 200)
+        new_volume = max(100, base_volume + abs(price_change) * 5000 + volume_noise)
+        self.volume_history.append(new_volume)
+        current_sentiment = self.sentiment_history[-1]
+        sentiment_reversion = (0.5 - current_sentiment) * 0.1
+        sentiment_noise = np.random.normal(0, 0.08)
+        new_sentiment = current_sentiment + sentiment_reversion + sentiment_noise
+        self.sentiment_history.append(np.clip(new_sentiment, 0.0, 1.0))
+    def _execute_action(self, action: int) -> float:
+        prev_net_worth = self.balance + self.position * self.current_price
+        trade_size_multiplier = 0.2 * self.risk_multiplier
+        if action == 1:  # Buy
+            if self.balance > 0:
+                trade_amount = min(self.balance * trade_size_multiplier, self.balance)
+                cost = trade_amount * (1 + self.transaction_cost)
+                if cost <= self.balance:
+                    shares_bought = trade_amount / self.current_price
+                    self.position += shares_bought
+                    self.balance -= cost
+        elif action == 2:  # Sell
+            if self.position > 0:
+                sell_fraction = trade_size_multiplier
+                shares_to_sell = min(self.position * sell_fraction, self.position)
+                proceeds = shares_to_sell * self.current_price * (1 - self.transaction_cost)
+                self.position -= shares_to_sell
+                self.balance += proceeds
+        elif action == 3:  # Close
+            if self.position > 0:
+                proceeds = self.position * self.current_price * (1 - self.transaction_cost)
+                self.balance += proceeds
+                self.position = 0
+        new_net_worth = self.balance + self.position * self.current_price
+        raw_reward = (new_net_worth - prev_net_worth) / self.initial_balance * 100
+        risk_penalty = 0.0
+        if new_net_worth < self.initial_balance * 0.8:
+            risk_penalty = (self.initial_balance - new_net_worth) / self.initial_balance * 10
+        final_reward = raw_reward - risk_penalty
+        return final_reward
+    def _get_observation(self) -> np.ndarray:
+        recent_prices = self.price_history[-20:] if len(self.price_history) >= 20 else [self.current_price] * 20
+        price_features = [
+            self.current_price / 100.0,
+            np.mean(recent_prices) / 100.0,
+            np.std(recent_prices) / 100.0,
+            (self.current_price - np.min(recent_prices)) / (np.max(recent_prices) - np.min(recent_prices)) if len(recent_prices) > 1 else 0.5
+        ]
+        portfolio_features = [
+            self.balance / self.initial_balance,
+            self.position * self.current_price / self.initial_balance,
+            self.step_count / self.max_steps
+        ]
+        recent_sentiments = self.sentiment_history[-10:] if len(self.sentiment_history) >= 10 else [0.5] * 10
+        sentiment_features = [
+            np.mean(recent_sentiments),
+            np.std(recent_sentiments),
+            recent_sentiments[-1]
+        ]
+        technical_features = self._calculate_technical_indicators()
+        all_features = price_features + portfolio_features + sentiment_features + technical_features
+        observation = np.array(all_features[:15], dtype=np.float32)
+        return observation
+    def _get_info(self) -> Dict[str, Any]:
+        net_worth = self.balance + self.position * self.current_price
+        return_total = (net_worth - self.initial_balance) / self.initial_balance * 100
+        return {
+            'net_worth': net_worth,
+            'return_percent': return_total,
+            'position_value': self.position * self.current_price,
+            'cash_balance': self.balance,
+            'current_price': self.current_price,
+            'steps': self.step_count
+        }
+# ==== 3. DQN Agent ====
+class DQNAgent:
+    """
+    Deep Q-Network agent for trading.
+    """
+    def __init__(self, state_dim: int, action_dim: int, config: TradingConfig, device: str = 'cpu'):
+        self.device = torch.device(device)
+        self.state_dim = state_dim
+        self.action_dim = action_dim
+        self.config = config
+        self.q_network = self._build_network(state_dim, action_dim)
+        self.target_network = self._build_network(state_dim, action_dim)
+        self.target_network.load_state_dict(self.q_network.state_dict())
+        self.optimizer = optim.Adam(self.q_network.parameters(), lr=config.learning_rate)
+        self.criterion = nn.MSELoss()
+        self.memory = deque(maxlen=config.memory_size)
+        self.epsilon = config.epsilon_start
+        self.epsilon_min = config.epsilon_min
+        self.epsilon_decay = config.epsilon_decay
+        self.batch_size = config.batch_size
+        self.gamma = config.gamma
+        self.target_update = config.target_update
+        self.steps = 0
+    def _build_network(self, state_dim: int, action_dim: int) -> nn.Sequential:
+        return nn.Sequential(
+            nn.Linear(state_dim, self.config.hidden_size),
+            nn.ReLU(),
+            nn.Linear(self.config.hidden_size, self.config.hidden_size),
+            nn.ReLU(),
+            nn.Linear(self.config.hidden_size, self.config.hidden_size // 2),
+            nn.ReLU(),
+            nn.Linear(self.config.hidden_size // 2, action_dim)
+        ).to(self.device)
+    def select_action(self, state: np.ndarray, training: bool = True) -> int:
+        if training and random.random() < self.epsilon:
+            return random.randint(0, self.action_dim - 1)
+        state_tensor = torch.FloatTensor(state).unsqueeze(0).to(self.device)
+        with torch.no_grad():
+            q_values = self.q_network(state_tensor)
+            return q_values.argmax(1).item()
+    def store_transition(self, state: np.ndarray, action: int, reward: float, next_state: np.ndarray, done: bool):
+        self.memory.append((state, action, reward, next_state, done))
+    def update(self) -> float:
+        if len(self.memory) < self.batch_size:
+            return 0.0
+        batch = random.sample(self.memory, self.batch_size)
+        states, actions, rewards, next_states, dones = zip(*batch)
+        states = torch.FloatTensor(np.array(states)).to(self.device)
+        actions = torch.LongTensor(actions).to(self.device)
+        rewards = torch.FloatTensor(rewards).to(self.device)
+        next_states = torch.FloatTensor(np.array(next_states)).to(self.device)
+        dones = torch.BoolTensor(dones).to(self.device)
+        current_q_values = self.q_network(states).gather(1, actions.unsqueeze(1)).squeeze(1)
+        with torch.no_grad():
+            next_q_values = self.target_network(next_states).max(1)[0]
+            target_q_values = rewards + self.gamma * next_q_values * (~dones).float()
+        loss = self.criterion(current_q_values, target_q_values)
+        self.optimizer.zero_grad()
+        loss.backward()
+        torch.nn.utils.clip_grad_norm_(self.q_network.parameters(), 1.0)
+        self.optimizer.step()
+        self.steps += 1
+        if self.steps % self.target_update == 0:
+            self.target_network.load_state_dict(self.q_network.state_dict())
+        self.epsilon = max(self.epsilon_min, self.epsilon * self.epsilon_decay)
+        return loss.item()
+    def save(self, path: str):
+        torch.save({
+            'q_network_state_dict': self.q_network.state_dict(),
+            'target_network_state_dict': self.target_network.state_dict(),
+            'optimizer_state_dict': self.optimizer.state_dict(),
+            'epsilon': self.epsilon,
+            'steps': self.steps
+        }, path)
+    def load(self, path: str):
+        checkpoint = torch.load(path, map_location=self.device)
+        self.q_network.load_state_dict(checkpoint['q_network_state_dict'])
+        self.target_network.load_state_dict(checkpoint['target_network_state_dict'])
+        self.optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
+        self.epsilon = checkpoint['epsilon']
+        self.steps = checkpoint['steps']
+# ==== 4. Main Application ====
+class TradingDemo:
+    """
+    Main class integrating environment and agent, with training/simulation and plots.
+    """
+    def __init__(self):
+        self.config = TradingConfig()
+        self.env = None
+        self.agent = None
+        self.device = 'cuda' if torch.cuda.is_available() else 'cpu'
+        self.training_history = {
+            'episode_rewards': [],
+            'episode_losses': [],
+            'epsilon_history': []
+        }
+    def initialize(self, balance: float, risk: str, asset: str) -> str:
+        try:
+            self.config.initial_balance = float(balance)
+            self.config.risk_level = risk
+            self.config.asset_type = asset
+            self.env = AdvancedTradingEnvironment(self.config)
+            self.agent = DQNAgent(15, 4, self.config, self.device)
+            self.training_history = {
+                'episode_rewards': [],
+                'episode_losses': [],
+                'epsilon_history': []
+            }
+            return f"✅ System initialized! Balance: ${balance}, Risk: {risk}, Asset: {asset}"
+        except Exception as e:
+            return f"❌ Initialization failed: {str(e)}"
+    def train(self, episodes: int):
+        if self.env is None or self.agent is None:
+            yield "❌ Please initialize the system first!", None
+            return
+        try:
+            episodes = int(episodes)
+            for episode in range(episodes):
+                obs, _ = self.env.reset()
+                total_reward = 0
+                episode_loss = 0
+                update_count = 0
+                done = False
+                while not done:
+                    action = self.agent.select_action(obs)
+                    next_obs, reward, done, _, info = self.env.step(action)
+                    self.agent.store_transition(obs, action, reward, next_obs, done)
+                    loss = self.agent.update()
+                    if loss > 0:
+                        episode_loss += loss
+                        update_count += 1
+                    total_reward += reward
+                    obs = next_obs
+                avg_loss = episode_loss / max(update_count, 1)
+                self.training_history['episode_rewards'].append(total_reward)
+                self.training_history['episode_losses'].append(avg_loss)
+                self.training_history['epsilon_history'].append(self.agent.epsilon)
+                progress = f"Episode {episode+1}/{episodes} | " \
+                         f"Reward: {total_reward:.2f} | " \
+                         f"Loss: {avg_loss:.4f} | " \
+                         f"Epsilon: {self.agent.epsilon:.3f} | " \
+                         f"Net Worth: ${info['net_worth']:.2f}"
+                if (episode + 1) % 10 == 0 or episode == episodes - 1:
+                    plot = self._create_training_plot()
+                    yield progress, plot
+                else:
+                    yield progress, None
+            yield "✅ Training completed successfully!", self._create_training_plot()
+        except Exception as e:
+            yield f"❌ Training error: {str(e)}", None
+    def simulate(self, steps: int):
+        if self.env is None or self.agent is None:
+            return "❌ Please initialize and train the system first!", None
+        try:
+            steps = int(steps)
+            obs, _ = self.env.reset()
+            prices = []
+            actions = []
+            net_worths = []
+            portfolio_values = []
+            cash_balances = []
+            for step in range(steps):
+                action = self.agent.select_action(obs, training=False)
+                next_obs, reward, done, _, info = self.env.step(action)
+                prices.append(self.env.current_price)
+                actions.append(action)
+                net_worths.append(info['net_worth'])
+                portfolio_values.append(info['position_value'])
+                cash_balances.append(info['cash_balance'])
+                obs = next_obs
+                if done:
+                    break
+            fig = self._create_simulation_plot(prices, actions, net_worths, portfolio_values, cash_balances)
+            final_return = (net_worths[-1] - self.config.initial_balance) / self.config.initial_balance * 100
+            result_text = f"✅ Simulation completed! Final Return: {final_return:.2f}% | " \
+                         f"Final Net Worth: ${net_worths[-1]:.2f}"
+            return result_text, fig
+        except Exception as e:
+            return f"❌ Simulation error: {str(e)}", None
+    def _create_training_plot(self):
+        if not self.training_history['episode_rewards']:
+            return None
+        episodes = list(range(1, len(self.training_history['episode_rewards']) + 1))
+        fig = make_subplots(rows=2, cols=2,
+           subplot_titles=('Episode Rewards', 'Training Loss',
+                          'Epsilon Decay', 'Moving Average Reward'),
+           vertical_spacing=0.12)
+        fig.add_trace(
+            go.Scatter(x=episodes, y=self.training_history['episode_rewards'],
+                      mode='lines', name='Reward', line=dict(color='blue')),
+            row=1, col=1
+        )
+        fig.add_trace(
+            go.Scatter(x=episodes, y=self.training_history['episode_losses'],
+                      mode='lines', name='Loss', line=dict(color='red')),
+            row=1, col=2
+        )
+        fig.add_trace(
+            go.Scatter(x=episodes, y=self.training_history['epsilon_history'],
+                      mode='lines', name='Epsilon', line=dict(color='green')),
+            row=2, col=1
+        )
+        window = min(20, len(episodes))
+        moving_avg = [np.mean(self.training_history['episode_rewards'][max(0, i-window):i+1])
+                     for i in range(len(episodes))]
+        fig.add_trace(
+            go.Scatter(x=episodes, y=moving_avg,
+                      mode='lines', name='MA Reward', line=dict(color='orange', width=2)),
+            row=2, col=2
+        )
+        fig.update_layout(height=600, showlegend=True, title_text="Training Progress")
+        return fig
+    def _create_simulation_plot(self, prices, actions, net_worths, portfolio_values, cash_balances):
+        fig = make_subplots(rows=2, cols=2,
+           subplot_titles=('Price & Actions', 'Portfolio Performance',
+                         'Portfolio Composition', 'Action Distribution'),
+           vertical_spacing=0.12,
+           horizontal_spacing=0.1)
+        steps = list(range(len(prices)))
+        fig.add_trace(
+            go.Scatter(x=steps, y=prices, mode='lines', name='Price', line=dict(color='blue')),
+            row=1, col=1
+        )
+        action_colors = ['gray', 'green', 'red', 'orange']
+        action_names = ['Hold', 'Buy', 'Sell', 'Close']
+        for action in range(4):
+            action_indices = [i for i, a in enumerate(actions) if a == action]
+            if action_indices:
+                action_prices = [prices[i] for i in action_indices]
+                fig.add_trace(
+                    go.Scatter(x=action_indices, y=action_prices,
+                              mode='markers', name=action_names[action],
+                              marker=dict(color=action_colors[action], size=8)),
+                    row=1, col=1
+                )
+        initial_balance = self.config.initial_balance
+        returns = [(nw - initial_balance) / initial_balance * 100 for nw in net_worths]
+        fig.add_trace(
+            go.Scatter(x=steps, y=net_worths, mode='lines', name='Net Worth', line=dict(color='purple')),
+            row=1, col=2
+        )
+        fig.add_trace(
+            go.Scatter(x=steps, y=returns, mode='lines', name='Return %', line=dict(color='orange'), yaxis='y2'),
+            row=1, col=2
+        )
+        fig.add_trace(
+            go.Scatter(x=steps, y=portfolio_values, mode='lines', name='Portfolio Value', line=dict(color='green')),
+            row=2, col=1
+        )
+        fig.add_trace(
+            go.Scatter(x=steps, y=cash_balances, mode='lines', name='Cash Balance', line=dict(color='blue')),
+            row=2, col=1
+        )
+        action_counts = [actions.count(i) for i in range(4)]
+        fig.add_trace(
+            go.Bar(x=action_names, y=action_counts,
+                  marker_color=action_colors, name='Action Count'),
+            row=2, col=2
+        )
+        fig.update_layout(height=700, showlegend=True, title_text="Trading Simulation Results")
+        fig.update_yaxes(title_text="Return (%)", row=1, col=2, secondary_y=True)
+        fig.update_yaxes(title_text="Value ($)", row=1, col=2, secondary_y=False)
+        return fig
+# ==== 5. Gradio Interface ====
 def create_interface():
     demo = TradingDemo()
     with gr.Blocks(theme=gr.themes.Soft(), title="AI Trading Demo") as interface:
         gr.Markdown("""
         # 🤖 Advanced AI Trading Demo
         **Deep Reinforcement Learning for Financial Markets**
         This demo shows a DQN agent learning to trade in simulated financial markets.
         The agent learns optimal trading strategies through reinforcement learning.
         """)
                 risk = gr.Radio(["Low", "Medium", "High"], value="Medium", label="Risk Level")
                 asset = gr.Radio(["Crypto", "Stock", "Forex"], value="Crypto", label="Asset Type")
                 init_btn = gr.Button("🚀 Initialize System", variant="primary")
             with gr.Column(scale=2):
                 gr.Markdown("## 📊 System Status")
                 status = gr.Textbox(label="Status", value="Click 'Initialize System' to start", interactive=False)
                 episodes = gr.Number(value=100, label="Training Episodes", precision=0)
                 train_btn = gr.Button("🎯 Start Training", variant="primary")
                 train_plot = gr.Plot(label="Training Progress")
             with gr.Column():
                 gr.Markdown("## 📈 Simulation")
                 steps = gr.Number(value=200, label="Simulation Steps", precision=0)
                 sim_btn = gr.Button("▶️ Run Simulation", variant="primary")
                 sim_plot = gr.Plot(label="Simulation Results")
         init_btn.click(
             demo.initialize,
             inputs=[balance, risk, asset],
             outputs=status
         )
         train_btn.click(
             demo.train,
             inputs=episodes,
             outputs=[status, train_plot]
         )
         sim_btn.click(
             demo.simulate,
             inputs=steps,
         2. **Initialize**: Click 'Initialize System' to set up the trading environment
         3. **Train**: Start training the AI agent (recommended: 100+ episodes)
         4. **Simulate**: Run a trading simulation to see the trained agent in action
         ## 🎮 Actions:
         - **0: Hold** - Maintain current position
         - **1: Buy** - Purchase asset (20% of balance)
         """)
     return interface
 demo = create_interface()