Spaces:

OmidSakaki
/

VisualTradingAI

Sleeping

App Files Files Community

OmidSakaki commited on Oct 16, 2025

Commit

ef8a89d

verified ·

1 Parent(s): 2965337

Update app.py

Browse files

Files changed (1) hide show

app.py +4 -707

app.py CHANGED Viewed

@@ -5,717 +5,19 @@ import torch.nn as nn
 import torch.optim as optim
 from collections import deque
 import random
-from pathlib import Path
 from typing import Dict, Tuple, Any, List, Optional
 import plotly.graph_objects as go
 from plotly.subplots import make_subplots
-import yaml
-# Configuration class for trading parameters
-class TradingConfig:
-    """
-    Configuration class for trading environment and agent parameters.
-    Centralizes all configurable parameters for easy modification.
-    """
-    def __init__(self):
-        # Environment parameters
-        self.initial_balance = 10000.0
-        self.max_steps = 1000
-        self.transaction_cost = 0.001
-        self.risk_level = "Medium"
-        self.asset_type = "Crypto"
-        # DQN agent parameters
-        self.learning_rate = 0.0001
-        self.gamma = 0.99  # Discount factor
-        self.epsilon_start = 1.0
-        self.epsilon_min = 0.01
-        self.epsilon_decay = 0.9995
-        self.batch_size = 32
-        self.memory_size = 10000
-        self.target_update = 100
-        self.hidden_size = 128
-        # Risk adjustment factors based on risk level
-        self.risk_multipliers = {
-            "Low": 0.5,
-            "Medium": 1.0,
-            "High": 2.0
-        }
-class AdvancedTradingEnvironment:
-    """
-    Advanced trading environment simulating financial markets with multiple assets.
-    Supports crypto, stocks, and forex with realistic price movements and sentiment analysis.
-    """
-    def __init__(self, config: TradingConfig):
-        self.config = config
-        self.initial_balance = config.initial_balance
-        self.balance = self.initial_balance
-        self.position = 0.0
-        self.current_price = 100.0
-        self.step_count = 0
-        self.max_steps = config.max_steps
-        self.transaction_cost = config.transaction_cost
-        # Market data history
-        self.price_history = []
-        self.volume_history = []
-        self.sentiment_history = []
-        # Risk adjustment
-        self.risk_multiplier = config.risk_multipliers[config.risk_level]
-        # Initialize market data
-        self._initialize_market_data()
-        # Define action and observation spaces
-        self.action_space = 4  # 0: Hold, 1: Buy, 2: Sell, 3: Close Position
-        self.observation_space = (15,)  # Increased features for better state representation
-        # Portfolio tracking
-        self.portfolio_history = []
-        self.action_history = []
-    def _initialize_market_data(self):
-        """Initialize synthetic market data based on asset type"""
-        n_points = 200  # Longer history for better indicators
-        # Different volatility based on asset type
-        volatility_map = {
-            "Crypto": 0.03,
-            "Stock": 0.015,
-            "Forex": 0.008
-        }
-        volatility = volatility_map.get(self.config.asset_type, 0.02)
-        base_price = 100.0
-        for i in range(n_points):
-            # More realistic price generation with momentum
-            momentum = np.sin(i * 0.05) * 2
-            noise = np.random.normal(0, volatility)
-            price = base_price * (1 + momentum * 0.01 + noise)
-            price = max(10.0, price)  # Prevent negative prices
-            self.price_history.append(price)
-            # Volume with some correlation to price movement
-            volume = 1000 + abs(price - base_price) * 50 + np.random.normal(0, 200)
-            self.volume_history.append(max(100, volume))
-            # Sentiment with persistence
-            if i > 0:
-                prev_sentiment = self.sentiment_history[-1]
-                sentiment_change = np.random.normal(0, 0.08)
-                sentiment = prev_sentiment + sentiment_change
-            else:
-                sentiment = 0.5 + np.random.normal(0, 0.1)
-            self.sentiment_history.append(np.clip(sentiment, 0.0, 1.0))
-        self.current_price = self.price_history[-1]
-    def _calculate_technical_indicators(self) -> List[float]:
-        """Calculate technical indicators from price history"""
-        prices = np.array(self.price_history[-50:])  # Use last 50 points
-        if len(prices) < 2:
-            return [0.0] * 6  # Default values
-        returns = np.diff(prices) / prices[:-1]
-        # Simple Moving Averages
-        sma_short = np.mean(prices[-10:]) if len(prices) >= 10 else prices[-1]
-        sma_long = np.mean(prices[-20:]) if len(prices) >= 20 else prices[-1]
-        # RSI (Relative Strength Index)
-        if len(returns) >= 14:
-            gains = returns[returns > 0]
-            losses = -returns[returns < 0]
-            avg_gain = np.mean(gains[-14:]) if len(gains) > 0 else 0.001
-            avg_loss = np.mean(losses[-14:]) if len(losses) > 0 else 0.001
-            rsi = 100 - (100 / (1 + avg_gain / avg_loss))
-        else:
-            rsi = 50.0
-        # Volatility (annualized)
-        volatility = np.std(returns) * np.sqrt(252) if len(returns) > 1 else 0.1
-        # Price momentum
-        momentum = (prices[-1] / prices[-5] - 1) if len(prices) >= 5 else 0.0
-        # Volume trend
-        volumes = np.array(self.volume_history[-10:])
-        volume_trend = np.mean(volumes[-5:]) / np.mean(volumes[-10:]) - 1 if len(volumes) >= 10 else 0.0
-        return [sma_short/100, sma_long/100, rsi/100, volatility, momentum, volume_trend]
-    def reset(self) -> Tuple[np.ndarray, Dict]:
-        """Reset environment to initial state"""
-        self.balance = self.initial_balance
-        self.position = 0.0
-        self.step_count = 0
-        self.portfolio_history = []
-        self.action_history = []
-        # Reinitialize market data
-        self.price_history = [100.0 + np.random.normal(0, 5)]
-        self.volume_history = [1000 + np.random.normal(0, 200)]
-        self.sentiment_history = [0.5 + np.random.normal(0, 0.1)]
-        self.current_price = self.price_history[-1]
-        obs = self._get_observation()
-        info = self._get_info()
-        return obs, info
-    def step(self, action: int) -> Tuple[np.ndarray, float, bool, bool, Dict]:
-        """Execute one trading step"""
-        self.step_count += 1
-        # Generate new market data with more realistic dynamics
-        self._update_market_data()
-        # Execute trading action
-        reward = self._execute_action(action)
-        # Check termination conditions
-        terminated = self.balance <= 0 or self.step_count >= self.max_steps
-        truncated = False
-        # Get new observation and info
-        obs = self._get_observation()
-        info = self._get_info()
-        # Track portfolio value
-        self.portfolio_history.append(info['net_worth'])
-        self.action_history.append(action)
-        return obs, reward, terminated, truncated, info
-    def _update_market_data(self):
-        """Update market data with realistic price movements"""
-        # Price change with momentum and volatility clustering
-        prev_returns = np.diff(self.price_history[-5:]) / self.price_history[-5:-1] if len(self.price_history) >= 6 else [0]
-        momentum = np.mean(prev_returns) if prev_returns else 0
-        volatility_map = {
-            "Crypto": 0.025,
-            "Stock": 0.012,
-            "Forex": 0.006
-        }
-        base_volatility = volatility_map.get(self.config.asset_type, 0.015)
-        # Volatility scaling based on risk level
-        volatility = base_volatility * self.risk_multiplier
-        price_change = momentum * 0.3 + np.random.normal(0, volatility)
-        self.current_price = max(10.0, self.current_price * (1 + price_change))
-        self.price_history.append(self.current_price)
-        # Update volume with some noise
-        base_volume = 1000
-        volume_noise = np.random.normal(0, 200)
-        new_volume = max(100, base_volume + abs(price_change) * 5000 + volume_noise)
-        self.volume_history.append(new_volume)
-        # Update sentiment with mean reversion
-        current_sentiment = self.sentiment_history[-1]
-        sentiment_reversion = (0.5 - current_sentiment) * 0.1  # Mean reversion
-        sentiment_noise = np.random.normal(0, 0.08)
-        new_sentiment = current_sentiment + sentiment_reversion + sentiment_noise
-        self.sentiment_history.append(np.clip(new_sentiment, 0.0, 1.0))
-    def _execute_action(self, action: int) -> float:
-        """Execute trading action and calculate reward"""
-        prev_net_worth = self.balance + self.position * self.current_price
-        trade_size_multiplier = 0.2 * self.risk_multiplier  # Risk-adjusted position sizing
-        if action == 1:  # Buy
-            if self.balance > 0:
-                trade_amount = min(self.balance * trade_size_multiplier, self.balance)
-                cost = trade_amount * (1 + self.transaction_cost)
-                if cost <= self.balance:
-                    shares_bought = trade_amount / self.current_price
-                    self.position += shares_bought
-                    self.balance -= cost
-        elif action == 2:  # Sell
-            if self.position > 0:
-                sell_fraction = trade_size_multiplier
-                shares_to_sell = min(self.position * sell_fraction, self.position)
-                proceeds = shares_to_sell * self.current_price * (1 - self.transaction_cost)
-                self.position -= shares_to_sell
-                self.balance += proceeds
-        elif action == 3:  # Close position
-            if self.position > 0:
-                proceeds = self.position * self.current_price * (1 - self.transaction_cost)
-                self.balance += proceeds
-                self.position = 0
-        # Calculate new net worth and reward
-        new_net_worth = self.balance + self.position * self.current_price
-        raw_reward = (new_net_worth - prev_net_worth) / self.initial_balance * 100
-        # Risk-adjusted reward with penalty for large drawdowns
-        risk_penalty = 0.0
-        if new_net_worth < self.initial_balance * 0.8:  # 20% drawdown
-            risk_penalty = (self.initial_balance - new_net_worth) / self.initial_balance * 10
-        final_reward = raw_reward - risk_penalty
-        return final_reward
-    def _get_observation(self) -> np.ndarray:
-        """Get current environment observation"""
-        # Price-based features
-        recent_prices = self.price_history[-20:] if len(self.price_history) >= 20 else [self.current_price] * 20
-        price_features = [
-            self.current_price / 100.0,
-            np.mean(recent_prices) / 100.0,
-            np.std(recent_prices) / 100.0,
-            (self.current_price - np.min(recent_prices)) / (np.max(recent_prices) - np.min(recent_prices)) if len(recent_prices) > 1 else 0.5
-        ]
-        # Portfolio features
-        portfolio_features = [
-            self.balance / self.initial_balance,
-            self.position * self.current_price / self.initial_balance,
-            self.step_count / self.max_steps
-        ]
-        # Sentiment features
-        recent_sentiments = self.sentiment_history[-10:] if len(self.sentiment_history) >= 10 else [0.5] * 10
-        sentiment_features = [
-            np.mean(recent_sentiments),
-            np.std(recent_sentiments),
-            recent_sentiments[-1]  # Latest sentiment
-        ]
-        # Technical indicators
-        technical_features = self._calculate_technical_indicators()
-        # Combine all features
-        all_features = price_features + portfolio_features + sentiment_features + technical_features
-        # Ensure fixed size and convert to numpy array
-        observation = np.array(all_features[:15], dtype=np.float32)
-        return observation
-    def _get_info(self) -> Dict[str, Any]:
-        """Get environment information for logging"""
-        net_worth = self.balance + self.position * self.current_price
-        return_total = (net_worth - self.initial_balance) / self.initial_balance * 100
-        return {
-            'net_worth': net_worth,
-            'return_percent': return_total,
-            'position_value': self.position * self.current_price,
-            'cash_balance': self.balance,
-            'current_price': self.current_price,
-            'steps': self.step_count
-        }
-class DQNAgent:
-    """
-    Deep Q-Network agent for trading decisions.
-    Implements experience replay and target network for stable learning.
-    """
-    def __init__(self, state_dim: int, action_dim: int, config: TradingConfig, device: str = 'cpu'):
-        self.device = torch.device(device)
-        self.state_dim = state_dim
-        self.action_dim = action_dim
-        self.config = config
-        # Q-network and target network
-        self.q_network = self._build_network(state_dim, action_dim)
-        self.target_network = self._build_network(state_dim, action_dim)
-        self.target_network.load_state_dict(self.q_network.state_dict())
-        # Optimization
-        self.optimizer = optim.Adam(self.q_network.parameters(), lr=config.learning_rate)
-        self.criterion = nn.MSELoss()
-        # Experience replay
-        self.memory = deque(maxlen=config.memory_size)
-        # Exploration parameters
-        self.epsilon = config.epsilon_start
-        self.epsilon_min = config.epsilon_min
-        self.epsilon_decay = config.epsilon_decay
-        # Training parameters
-        self.batch_size = config.batch_size
-        self.gamma = config.gamma
-        self.target_update = config.target_update
-        self.steps = 0
-    def _build_network(self, state_dim: int, action_dim: int) -> nn.Sequential:
-        """Build the neural network for Q-value approximation"""
-        return nn.Sequential(
-            nn.Linear(state_dim, self.config.hidden_size),
-            nn.ReLU(),
-            nn.Linear(self.config.hidden_size, self.config.hidden_size),
-            nn.ReLU(),
-            nn.Linear(self.config.hidden_size, self.config.hidden_size // 2),
-            nn.ReLU(),
-            nn.Linear(self.config.hidden_size // 2, action_dim)
-        ).to(self.device)
-    def select_action(self, state: np.ndarray, training: bool = True) -> int:
-        """Select action using epsilon-greedy policy"""
-        if training and random.random() < self.epsilon:
-            return random.randint(0, self.action_dim - 1)
-        state_tensor = torch.FloatTensor(state).unsqueeze(0).to(self.device)
-        with torch.no_grad():
-            q_values = self.q_network(state_tensor)
-            return q_values.argmax(1).item()
-    def store_transition(self, state: np.ndarray, action: int, reward: float,
-                        next_state: np.ndarray, done: bool):
-        """Store experience in replay memory"""
-        self.memory.append((state, action, reward, next_state, done))
-    def update(self) -> float:
-        """Update Q-network using experience replay"""
-        if len(self.memory) < self.batch_size:
-            return 0.0
-        # Sample batch from memory
-        batch = random.sample(self.memory, self.batch_size)
-        states, actions, rewards, next_states, dones = zip(*batch)
-        # Convert to tensors
-        states = torch.FloatTensor(np.array(states)).to(self.device)
-        actions = torch.LongTensor(actions).to(self.device)
-        rewards = torch.FloatTensor(rewards).to(self.device)
-        next_states = torch.FloatTensor(np.array(next_states)).to(self.device)
-        dones = torch.BoolTensor(dones).to(self.device)  # Fixed: Use BoolTensor instead of FloatTensor
-        # Current Q values
-        current_q_values = self.q_network(states).gather(1, actions.unsqueeze(1)).squeeze(1)
-        # Next Q values from target network
-        with torch.no_grad():
-            next_q_values = self.target_network(next_states).max(1)[0]
-            # Fixed: Use proper boolean masking
-            target_q_values = rewards + self.gamma * next_q_values * (~dones).float()
-        # Compute loss and update
-        loss = self.criterion(current_q_values, target_q_values)
-        self.optimizer.zero_grad()
-        loss.backward()
-        # Gradient clipping for stability
-        torch.nn.utils.clip_grad_norm_(self.q_network.parameters(), 1.0)
-        self.optimizer.step()
-        # Update target network periodically
-        self.steps += 1
-        if self.steps % self.target_update == 0:
-            self.target_network.load_state_dict(self.q_network.state_dict())
-        # Decay epsilon
-        self.epsilon = max(self.epsilon_min, self.epsilon * self.epsilon_decay)
-        return loss.item()
-    def save(self, path: str):
-        """Save agent parameters"""
-        torch.save({
-            'q_network_state_dict': self.q_network.state_dict(),
-            'target_network_state_dict': self.target_network.state_dict(),
-            'optimizer_state_dict': self.optimizer.state_dict(),
-            'epsilon': self.epsilon,
-            'steps': self.steps
-        }, path)
-    def load(self, path: str):
-        """Load agent parameters"""
-        checkpoint = torch.load(path, map_location=self.device)
-        self.q_network.load_state_dict(checkpoint['q_network_state_dict'])
-        self.target_network.load_state_dict(checkpoint['target_network_state_dict'])
-        self.optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
-        self.epsilon = checkpoint['epsilon']
-        self.steps = checkpoint['steps']
-class TradingDemo:
-    """
-    Main demonstration class integrating trading environment and DQN agent.
-    Provides interface for training, simulation, and visualization.
-    """
-    def __init__(self):
-        self.config = TradingConfig()
-        self.env = None
-        self.agent = None
-        self.device = 'cuda' if torch.cuda.is_available() else 'cpu'
-        print(f"Using device: {self.device}")
-        # Training history
-        self.training_history = {
-            'episode_rewards': [],
-            'episode_losses': [],
-            'epsilon_history': []
-        }
-    def initialize(self, balance: float, risk: str, asset: str) -> str:
-        """Initialize trading environment and agent"""
-        try:
-            self.config.initial_balance = float(balance)
-            self.config.risk_level = risk
-            self.config.asset_type = asset
-            # Create environment and agent
-            self.env = AdvancedTradingEnvironment(self.config)
-            self.agent = DQNAgent(15, 4, self.config, self.device)
-            # Reset training history
-            self.training_history = {
-                'episode_rewards': [],
-                'episode_losses': [],
-                'epsilon_history': []
-            }
-            return f"✅ System initialized! Balance: ${balance}, Risk: {risk}, Asset: {asset}"
-        except Exception as e:
-            return f"❌ Initialization failed: {str(e)}"
-    def train(self, episodes: int):
-        """Train the DQN agent"""
-        if self.env is None or self.agent is None:
-            yield "❌ Please initialize the system first!", None
-            return
-        try:
-            episodes = int(episodes)
-            for episode in range(episodes):
-                # Reset environment
-                obs, _ = self.env.reset()
-                total_reward = 0
-                episode_loss = 0
-                update_count = 0
-                done = False
-                while not done:
-                    # Select and execute action
-                    action = self.agent.select_action(obs)
-                    next_obs, reward, done, _, info = self.env.step(action)
-                    # Store experience and update
-                    self.agent.store_transition(obs, action, reward, next_obs, done)
-                    loss = self.agent.update()
-                    if loss > 0:
-                        episode_loss += loss
-                        update_count += 1
-                    total_reward += reward
-                    obs = next_obs
-                # Calculate average loss
-                avg_loss = episode_loss / max(update_count, 1)
-                # Update history
-                self.training_history['episode_rewards'].append(total_reward)
-                self.training_history['episode_losses'].append(avg_loss)
-                self.training_history['epsilon_history'].append(self.agent.epsilon)
-                # Yield progress
-                progress = f"Episode {episode+1}/{episodes} | " \
-                         f"Reward: {total_reward:.2f} | " \
-                         f"Loss: {avg_loss:.4f} | " \
-                         f"Epsilon: {self.agent.epsilon:.3f} | " \
-                         f"Net Worth: ${info['net_worth']:.2f}"
-                # Create training plot every 10 episodes or at the end
-                if (episode + 1) % 10 == 0 or episode == episodes - 1:
-                    plot = self._create_training_plot()
-                    yield progress, plot
-                else:
-                    yield progress, None
-            yield "✅ Training completed successfully!", self._create_training_plot()
-        except Exception as e:
-            yield f"❌ Training error: {str(e)}", None
-    def simulate(self, steps: int):
-        """Run trading simulation with current policy"""
-        if self.env is None or self.agent is None:
-            return "❌ Please initialize and train the system first!", None
-        try:
-            steps = int(steps)
-            obs, _ = self.env.reset()
-            # Tracking data
-            prices = []
-            actions = []
-            net_worths = []
-            portfolio_values = []
-            cash_balances = []
-            for step in range(steps):
-                action = self.agent.select_action(obs, training=False)
-                next_obs, reward, done, _, info = self.env.step(action)
-                # Track metrics
-                prices.append(self.env.current_price)
-                actions.append(action)
-                net_worths.append(info['net_worth'])
-                portfolio_values.append(info['position_value'])
-                cash_balances.append(info['cash_balance'])
-                obs = next_obs
-                if done:
-                    break
-            # Create comprehensive visualization
-            fig = self._create_simulation_plot(prices, actions, net_worths, portfolio_values, cash_balances)
-            final_return = (net_worths[-1] - self.config.initial_balance) / self.config.initial_balance * 100
-            result_text = f"✅ Simulation completed! Final Return: {final_return:.2f}% | " \
-                         f"Final Net Worth: ${net_worths[-1]:.2f}"
-            return result_text, fig
-        except Exception as e:
-            return f"❌ Simulation error: {str(e)}", None
-    def _create_training_plot(self):
-        """Create training progress visualization"""
-        if not self.training_history['episode_rewards']:
-            return None
-        episodes = list(range(1, len(self.training_history['episode_rewards']) + 1))
-        fig = make_subplots(rows=2, cols=2,
-                           subplot_titles=('Episode Rewards', 'Training Loss',
-                                         'Epsilon Decay', 'Moving Average Reward'),
-                           vertical_spacing=0.12)
-        # Rewards
-        fig.add_trace(
-            go.Scatter(x=episodes, y=self.training_history['episode_rewards'],
-                      mode='lines', name='Reward', line=dict(color='blue')),
-            row=1, col=1
-        )
-        # Loss
-        fig.add_trace(
-            go.Scatter(x=episodes, y=self.training_history['episode_losses'],
-                      mode='lines', name='Loss', line=dict(color='red')),
-            row=1, col=2
-        )
-        # Epsilon
-        fig.add_trace(
-            go.Scatter(x=episodes, y=self.training_history['epsilon_history'],
-                      mode='lines', name='Epsilon', line=dict(color='green')),
-            row=2, col=1
-        )
-        # Moving average reward
-        window = min(20, len(episodes))
-        moving_avg = [np.mean(self.training_history['episode_rewards'][max(0, i-window):i+1])
-                     for i in range(len(episodes))]
-        fig.add_trace(
-            go.Scatter(x=episodes, y=moving_avg,
-                      mode='lines', name='MA Reward', line=dict(color='orange', width=2)),
-            row=2, col=2
-        )
-        fig.update_layout(height=600, showlegend=True, title_text="Training Progress")
-        return fig
-    def _create_simulation_plot(self, prices, actions, net_worths, portfolio_values, cash_balances):
-        """Create comprehensive simulation results visualization"""
-        fig = make_subplots(rows=2, cols=2,
-                           subplot_titles=('Price & Actions', 'Portfolio Performance',
-                                         'Portfolio Composition', 'Action Distribution'),
-                           vertical_spacing=0.12,
-                           horizontal_spacing=0.1)
-        steps = list(range(len(prices)))
-        # Price and actions
-        fig.add_trace(
-            go.Scatter(x=steps, y=prices, mode='lines', name='Price', line=dict(color='blue')),
-            row=1, col=1
-        )
-        # Add action markers
-        action_colors = ['gray', 'green', 'red', 'orange']  # Hold, Buy, Sell, Close
-        action_names = ['Hold', 'Buy', 'Sell', 'Close']
-        for action in range(4):
-            action_indices = [i for i, a in enumerate(actions) if a == action]
-            if action_indices:
-                action_prices = [prices[i] for i in action_indices]
-                fig.add_trace(
-                    go.Scatter(x=action_indices, y=action_prices,
-                              mode='markers', name=action_names[action],
-                              marker=dict(color=action_colors[action], size=8)),
-                    row=1, col=1
-                )
-        # Portfolio performance
-        initial_balance = self.config.initial_balance
-        returns = [(nw - initial_balance) / initial_balance * 100 for nw in net_worths]
-        fig.add_trace(
-            go.Scatter(x=steps, y=net_worths, mode='lines', name='Net Worth', line=dict(color='purple')),
-            row=1, col=2
-        )
-        fig.add_trace(
-            go.Scatter(x=steps, y=returns, mode='lines', name='Return %', line=dict(color='orange'), yaxis='y2'),
-            row=1, col=2
-        )
-        # Portfolio composition
-        fig.add_trace(
-            go.Scatter(x=steps, y=portfolio_values, mode='lines', name='Portfolio Value', line=dict(color='green')),
-            row=2, col=1
-        )
-        fig.add_trace(
-            go.Scatter(x=steps, y=cash_balances, mode='lines', name='Cash Balance', line=dict(color='blue')),
-            row=2, col=1
-        )
-        # Action distribution
-        action_counts = [actions.count(i) for i in range(4)]
-        fig.add_trace(
-            go.Bar(x=action_names, y=action_counts,
-                  marker_color=action_colors, name='Action Count'),
-            row=2, col=2
-        )
-        # Update layout
-        fig.update_layout(height=700, showlegend=True, title_text="Trading Simulation Results")
-        fig.update_yaxes(title_text="Return (%)", row=1, col=2, secondary_y=True)
-        fig.update_yaxes(title_text="Value ($)", row=1, col=2, secondary_y=False)
-        return fig
-# Create and launch Gradio interface
 def create_interface():
-    """Create Gradio interface for the trading demo"""
     demo = TradingDemo()
     with gr.Blocks(theme=gr.themes.Soft(), title="AI Trading Demo") as interface:
         gr.Markdown("""
         # 🤖 Advanced AI Trading Demo
         **Deep Reinforcement Learning for Financial Markets**
         This demo shows a DQN agent learning to trade in simulated financial markets.
         The agent learns optimal trading strategies through reinforcement learning.
         """)
@@ -723,7 +25,6 @@ def create_interface():
         with gr.Row():
             with gr.Column(scale=1):
                 gr.Markdown("## 🎯 Configuration")
                 balance = gr.Slider(1000, 50000, 10000, step=1000, label="Initial Balance ($)")
                 risk = gr.Radio(["Low", "Medium", "High"], value="Medium", label="Risk Level")
                 asset = gr.Radio(["Crypto", "Stock", "Forex"], value="Crypto", label="Asset Type")
@@ -771,17 +72,13 @@ def create_interface():
         2. **Initialize**: Click 'Initialize System' to set up the trading environment
         3. **Train**: Start training the AI agent (recommended: 100+ episodes)
         4. **Simulate**: Run a trading simulation to see the trained agent in action
         ## 🎮 Actions:
         - **0: Hold** - Maintain current position
         - **1: Buy** - Purchase asset (20% of balance)
         - **2: Sell** - Sell portion of position (20%)
         - **3: Close** - Liquidate entire position
         """)
     return interface
-# Launch the application
-if __name__ == "__main__":
-    interface = create_interface()
-    interface.launch(share=True, server_name="0.0.0.0", server_port=7860)

 import torch.optim as optim
 from collections import deque
 import random
 from typing import Dict, Tuple, Any, List, Optional
 import plotly.graph_objects as go
 from plotly.subplots import make_subplots
+# (تمام کلاس‌های TradingConfig, AdvancedTradingEnvironment, DQNAgent, TradingDemo عیناً حفظ شده‌اند...)
+# این تابع همانند قبل است
 def create_interface():
     demo = TradingDemo()
     with gr.Blocks(theme=gr.themes.Soft(), title="AI Trading Demo") as interface:
         gr.Markdown("""
         # 🤖 Advanced AI Trading Demo
         **Deep Reinforcement Learning for Financial Markets**
         This demo shows a DQN agent learning to trade in simulated financial markets.
         The agent learns optimal trading strategies through reinforcement learning.
         """)
         with gr.Row():
             with gr.Column(scale=1):
                 gr.Markdown("## 🎯 Configuration")
                 balance = gr.Slider(1000, 50000, 10000, step=1000, label="Initial Balance ($)")
                 risk = gr.Radio(["Low", "Medium", "High"], value="Medium", label="Risk Level")
                 asset = gr.Radio(["Crypto", "Stock", "Forex"], value="Crypto", label="Asset Type")
         2. **Initialize**: Click 'Initialize System' to set up the trading environment
         3. **Train**: Start training the AI agent (recommended: 100+ episodes)
         4. **Simulate**: Run a trading simulation to see the trained agent in action
         ## 🎮 Actions:
         - **0: Hold** - Maintain current position
         - **1: Buy** - Purchase asset (20% of balance)
         - **2: Sell** - Sell portion of position (20%)
         - **3: Close** - Liquidate entire position
         """)
     return interface
+# نکته مهم: فقط این خط باید اجرا شود و نام متغیر باید demo باشد
+demo = create_interface()