Spaces:

OmidSakaki
/

VisualTradingAI

Sleeping

App Files Files Community

OmidSakaki commited on Oct 15, 2025

Commit

8f54cbf

verified ·

1 Parent(s): 769c366

Update src/environments/visual_trading_env.py

Browse files

Files changed (1) hide show

src/environments/visual_trading_env.py +102 -85

src/environments/visual_trading_env.py CHANGED Viewed

@@ -1,18 +1,17 @@
 import numpy as np
-import pandas as pd
 import matplotlib.pyplot as plt
 from PIL import Image
 import io
 class VisualTradingEnvironment:
     def __init__(self, initial_balance=10000, risk_level="Medium", asset_type="Stock"):
-        self.initial_balance = initial_balance
         self.risk_level = risk_level
         self.asset_type = asset_type
         # Risk multipliers
         risk_multipliers = {"Low": 0.5, "Medium": 1.0, "High": 2.0}
-        self.risk_multiplier = risk_multipliers[risk_level]
         # Generate market data
         self._generate_market_data()
@@ -25,21 +24,23 @@ class VisualTradingEnvironment:
         np.random.seed(42)
         # Base parameters based on asset type
-        if self.asset_type == "Crypto":
-            volatility = 0.02 * self.risk_multiplier
-            trend = 0.001
-        elif self.asset_type == "Forex":
-            volatility = 0.005 * self.risk_multiplier
-            trend = 0.0002
-        else:  # Stock
-            volatility = 0.01 * self.risk_multiplier
-            trend = 0.0005
         prices = [100.0]
         for i in range(1, num_points):
-            # Random walk with trend and volatility
             change = np.random.normal(trend, volatility)
-            price = max(1.0, prices[-1] * (1 + change))
             prices.append(price)
         self.price_data = np.array(prices)
@@ -61,7 +62,7 @@ class VisualTradingEnvironment:
             fig, ax = plt.subplots(figsize=(4.2, 4.2), dpi=20, facecolor='black')
             ax.set_facecolor('black')
-            # Plot price
             if len(prices) > 0:
                 ax.plot(prices, color='cyan', linewidth=1.5)
@@ -97,7 +98,7 @@ class VisualTradingEnvironment:
             plt.close(fig)
             # Create attention map with same dimensions
-            attention_map = np.zeros((84, 84))
             if len(prices) > 1:
                 recent_change = (prices[-1] - prices[-2]) / prices[-2]
                 intensity = min(255, abs(recent_change) * 5000)
@@ -110,12 +111,10 @@ class VisualTradingEnvironment:
                     for j in range(max(0, center_y-size), min(84, center_y+size)):
                         distance = np.sqrt((i-center_x)**2 + (j-center_y)**2)
                         if distance <= size:
-                            attention_map[i, j] = max(attention_map[i, j], intensity * (1 - distance/size))
-            # Ensure both arrays have same shape before concatenation
-            attention_map = attention_map.astype(np.uint8)
-            # Combine RGB with attention map - now both are 84x84
             visual_obs = np.concatenate([
                 img_array,
                 attention_map[:, :, np.newaxis]  # Add channel dimension
@@ -132,8 +131,8 @@ class VisualTradingEnvironment:
         """Reset environment to initial state"""
         self.current_step = 50  # Start with some history
         self.balance = self.initial_balance
-        self.position_size = 0
-        self.entry_price = 0
         self.net_worth = self.initial_balance
         self.total_trades = 0
         self.done = False
@@ -142,70 +141,88 @@ class VisualTradingEnvironment:
     def step(self, action):
         """Execute one trading step"""
-        current_price = self.price_data[self.current_step]
-        prev_net_worth = self.net_worth
-        reward = 0
-        # Execute action
-        if action == 1 and self.position_size == 0:  # Buy
-            # Risk-adjusted position sizing
-            position_value = self.balance * 0.1 * self.risk_multiplier
-            self.position_size = position_value / current_price
-            self.entry_price = current_price
-            self.balance -= position_value
-            self.total_trades += 1
-            reward = -0.01  # Small penalty for transaction
-        elif action == 2 and self.position_size > 0:  # Sell (increase position)
-            additional_value = self.balance * 0.05 * self.risk_multiplier
-            additional_size = additional_value / current_price
-            self.position_size += additional_size
-            self.balance -= additional_value
-            self.total_trades += 1
-            reward = -0.005
-        elif action == 3 and self.position_size > 0:  # Close position
-            close_value = self.position_size * current_price
-            self.balance += close_value
-            profit_loss = (current_price - self.entry_price) / self.entry_price
-            reward = profit_loss * 10  # Scale profit/loss
-            self.position_size = 0
-            self.entry_price = 0
-            self.total_trades += 1
-        # Update net worth
-        position_value = self.position_size * current_price if self.position_size > 0 else 0
-        self.net_worth = self.balance + position_value
-        # Add small reward for portfolio growth
-        portfolio_change = (self.net_worth - prev_net_worth) / prev_net_worth
-        reward += portfolio_change * 5
-        # Move to next step
-        self.current_step += 1
-        # Check if episode is done
-        if self.current_step >= len(self.price_data) - 1:
-            self.done = True
-            # Final reward based on overall performance
-            final_return = (self.net_worth - self.initial_balance) / self.initial_balance
-            reward += final_return * 20
-        info = {
-            'net_worth': self.net_worth,
-            'balance': self.balance,
-            'position_size': self.position_size,
-            'current_price': current_price,
-            'total_trades': self.total_trades,
-            'step': self.current_step
-        }
-        obs = self._get_visual_observation()
-        return obs, reward, self.done, info
     def get_price_history(self):
         """Get recent price history for visualization"""
         window_size = min(50, self.current_step)
         start_idx = max(0, self.current_step - window_size)
-        return self.price_data[start_idx:self.current_step].tolist()

 import numpy as np
 import matplotlib.pyplot as plt
 from PIL import Image
 import io
 class VisualTradingEnvironment:
     def __init__(self, initial_balance=10000, risk_level="Medium", asset_type="Stock"):
+        self.initial_balance = float(initial_balance)
         self.risk_level = risk_level
         self.asset_type = asset_type
         # Risk multipliers
         risk_multipliers = {"Low": 0.5, "Medium": 1.0, "High": 2.0}
+        self.risk_multiplier = risk_multipliers.get(risk_level, 1.0)
         # Generate market data
         self._generate_market_data()
         np.random.seed(42)
         # Base parameters based on asset type
+        base_params = {
+            "Stock": {"volatility": 0.01, "trend": 0.0005},
+            "Crypto": {"volatility": 0.02, "trend": 0.001},
+            "Forex": {"volatility": 0.005, "trend": 0.0002}
+        }
+        params = base_params.get(self.asset_type, base_params["Stock"])
+        volatility = params["volatility"] * self.risk_multiplier
+        trend = params["trend"]
         prices = [100.0]
         for i in range(1, num_points):
+            # Random walk with trend and some mean reversion
             change = np.random.normal(trend, volatility)
+            # Add some mean reversion
+            mean_reversion = (100 - prices[-1]) * 0.001
+            price = max(1.0, prices[-1] * (1 + change) + mean_reversion)
             prices.append(price)
         self.price_data = np.array(prices)
             fig, ax = plt.subplots(figsize=(4.2, 4.2), dpi=20, facecolor='black')
             ax.set_facecolor('black')
+            # Plot price if we have data
             if len(prices) > 0:
                 ax.plot(prices, color='cyan', linewidth=1.5)
             plt.close(fig)
             # Create attention map with same dimensions
+            attention_map = np.zeros((84, 84), dtype=np.uint8)
             if len(prices) > 1:
                 recent_change = (prices[-1] - prices[-2]) / prices[-2]
                 intensity = min(255, abs(recent_change) * 5000)
                     for j in range(max(0, center_y-size), min(84, center_y+size)):
                         distance = np.sqrt((i-center_x)**2 + (j-center_y)**2)
                         if distance <= size:
+                            attention_value = intensity * (1 - distance/size)
+                            attention_map[i, j] = max(attention_map[i, j], int(attention_value))
+            # Combine RGB with attention map
             visual_obs = np.concatenate([
                 img_array,
                 attention_map[:, :, np.newaxis]  # Add channel dimension
         """Reset environment to initial state"""
         self.current_step = 50  # Start with some history
         self.balance = self.initial_balance
+        self.position_size = 0.0
+        self.entry_price = 0.0
         self.net_worth = self.initial_balance
         self.total_trades = 0
         self.done = False
     def step(self, action):
         """Execute one trading step"""
+        try:
+            current_price = self.price_data[self.current_step]
+            prev_net_worth = self.net_worth
+            reward = 0.0
+            # Execute action
+            if action == 1 and self.position_size == 0:  # Buy
+                # Risk-adjusted position sizing
+                position_value = self.balance * 0.1 * self.risk_multiplier
+                self.position_size = position_value / current_price
+                self.entry_price = current_price
+                self.balance -= position_value
+                self.total_trades += 1
+                reward = -0.01  # Small penalty for transaction
+            elif action == 2 and self.position_size > 0:  # Sell (increase position)
+                additional_value = self.balance * 0.05 * self.risk_multiplier
+                additional_size = additional_value / current_price
+                self.position_size += additional_size
+                self.balance -= additional_value
+                self.total_trades += 1
+                reward = -0.005
+            elif action == 3 and self.position_size > 0:  # Close position
+                close_value = self.position_size * current_price
+                self.balance += close_value
+                if self.entry_price > 0:
+                    profit_loss = (current_price - self.entry_price) / self.entry_price
+                    reward = profit_loss * 10  # Scale profit/loss
+                self.position_size = 0.0
+                self.entry_price = 0.0
+                self.total_trades += 1
+            # Update net worth
+            position_value = self.position_size * current_price if self.position_size > 0 else 0.0
+            self.net_worth = self.balance + position_value
+            # Add small reward for portfolio growth
+            if prev_net_worth > 0:
+                portfolio_change = (self.net_worth - prev_net_worth) / prev_net_worth
+                reward += portfolio_change * 5
+            # Move to next step
+            self.current_step += 1
+            # Check if episode is done
+            if self.current_step >= len(self.price_data) - 1:
+                self.done = True
+                # Final reward based on overall performance
+                if self.initial_balance > 0:
+                    final_return = (self.net_worth - self.initial_balance) / self.initial_balance
+                    reward += final_return * 20
+            info = {
+                'net_worth': float(self.net_worth),
+                'balance': float(self.balance),
+                'position_size': float(self.position_size),
+                'current_price': float(current_price),
+                'total_trades': int(self.total_trades),
+                'step': int(self.current_step)
+            }
+            obs = self._get_visual_observation()
+            return obs, float(reward), bool(self.done), info
+        except Exception as e:
+            print(f"Error in step execution: {e}")
+            # Return safe default values in case of error
+            default_info = {
+                'net_worth': float(self.initial_balance),
+                'balance': float(self.initial_balance),
+                'position_size': 0.0,
+                'current_price': 100.0,
+                'total_trades': 0,
+                'step': int(self.current_step)
+            }
+            return self._get_visual_observation(), 0.0, True, default_info
     def get_price_history(self):
         """Get recent price history for visualization"""
         window_size = min(50, self.current_step)
         start_idx = max(0, self.current_step - window_size)
+        prices = self.price_data[start_idx:self.current_step]
+        return [float(price) for price in prices]