Spaces:

OmidSakaki
/

VisualTradingAI

Sleeping

App Files Files Community

OmidSakaki commited on Oct 16, 2025

Commit

144fc70

verified ·

1 Parent(s): bdb3f57

Update src/agents/advanced_agent.py

Browse files

Files changed (1) hide show

src/agents/advanced_agent.py +165 -113

src/agents/advanced_agent.py CHANGED Viewed

@@ -4,11 +4,13 @@ import torch.optim as optim
 import numpy as np
 from collections import deque
 import random
 class EnhancedTradingNetwork(nn.Module):
     def __init__(self, state_dim, action_dim, sentiment_dim=2):
         super(EnhancedTradingNetwork, self).__init__()
         # Visual processing branch
         self.visual_conv = nn.Sequential(
             nn.Conv2d(4, 16, kernel_size=4, stride=2),
@@ -19,16 +21,16 @@ class EnhancedTradingNetwork(nn.Module):
             nn.ReLU(),
             nn.AdaptiveAvgPool2d((8, 8))
         )
         # Calculate the output size after conv layers
         self.conv_output_size = 32 * 8 * 8
         self.visual_fc = nn.Sequential(
             nn.Linear(self.conv_output_size, 256),
             nn.ReLU(),
             nn.Dropout(0.3)
         )
         # Sentiment processing branch
         self.sentiment_fc = nn.Sequential(
             nn.Linear(sentiment_dim, 64),
@@ -37,7 +39,7 @@ class EnhancedTradingNetwork(nn.Module):
             nn.Linear(64, 32),
             nn.ReLU()
         )
         # Combined decision making
         self.combined_fc = nn.Sequential(
             nn.Linear(256 + 32, 128),
@@ -47,179 +49,232 @@ class EnhancedTradingNetwork(nn.Module):
             nn.ReLU(),
             nn.Linear(64, action_dim)
         )
     def forward(self, x, sentiment=None):
         try:
-            # Visual processing with proper reshaping
-            # x shape: (batch_size, 84, 84, 4) -> (batch_size, 4, 84, 84)
-            if len(x.shape) == 4:  # (batch, H, W, C)
-                x = x.permute(0, 3, 1, 2).contiguous()
             else:
-                # Handle single sample case
-                x = x.unsqueeze(0) if len(x.shape) == 3 else x
-                x = x.permute(0, 3, 1, 2).contiguous()
-            visual_features = self.visual_conv(x)
-            # Use reshape instead of view for safety
             batch_size = visual_features.size(0)
             visual_features = visual_features.reshape(batch_size, -1)
             visual_features = self.visual_fc(visual_features)
             # Sentiment processing
-            if sentiment is not None:
                 if len(sentiment.shape) == 1:
                     sentiment = sentiment.unsqueeze(0)
                 sentiment_features = self.sentiment_fc(sentiment)
                 combined_features = torch.cat([visual_features, sentiment_features], dim=1)
             else:
-                combined_features = visual_features
-            # Final decision
             q_values = self.combined_fc(combined_features)
             return q_values
         except Exception as e:
             print(f"Error in network forward: {e}")
-            # Return safe default
-            return torch.zeros((x.size(0) if hasattr(x, 'size') else 1, self.combined_fc[-1].out_features))
 class AdvancedTradingAgent:
     def __init__(self, state_dim, action_dim, learning_rate=0.001, use_sentiment=True):
-        self.state_dim = state_dim
         self.action_dim = action_dim
         self.learning_rate = learning_rate
         self.use_sentiment = use_sentiment
         self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
         print(f"Using device: {self.device}")
         # Neural network
         self.policy_net = EnhancedTradingNetwork(state_dim, action_dim).to(self.device)
         self.optimizer = optim.Adam(self.policy_net.parameters(), lr=learning_rate)
         # Experience replay
-        self.memory = deque(maxlen=500)
-        self.batch_size = 16
         # Training parameters
         self.gamma = 0.99
         self.epsilon = 1.0
-        self.epsilon_min = 0.1
-        self.epsilon_decay = 0.995
         self.steps_done = 0
-    def select_action(self, state, current_sentiment=0.5, sentiment_confidence=0.0):
-        """Select action with sentiment consideration"""
         if random.random() < self.epsilon:
             return random.randint(0, self.action_dim - 1)
         try:
-            # Normalize state
-            state_normalized = state.astype(np.float32) / 255.0
-            state_tensor = torch.FloatTensor(state_normalized).to(self.device)
-            if self.use_sentiment:
-                # Add sentiment to the decision process
-                sentiment_tensor = torch.FloatTensor([current_sentiment, sentiment_confidence]).to(self.device)
                 with torch.no_grad():
                     q_values = self.policy_net(state_tensor, sentiment_tensor)
             else:
                 with torch.no_grad():
                     q_values = self.policy_net(state_tensor)
-            return int(q_values.argmax().item())
         except Exception as e:
-            print(f"Error in advanced action selection: {e}")
             return random.randint(0, self.action_dim - 1)
     def store_transition(self, state, action, reward, next_state, done, sentiment_data=None):
-        """Store experience with sentiment data"""
         try:
-            experience = (state, action, reward, next_state, done, sentiment_data)
             self.memory.append(experience)
         except Exception as e:
             print(f"Error storing transition: {e}")
     def update(self):
-        """Update network with sentiment-enhanced learning"""
         if len(self.memory) < self.batch_size:
             return 0.0
         try:
-            # Sample batch from memory
             batch = random.sample(self.memory, self.batch_size)
-            states, actions, rewards, next_states, dones, sentiment_data = zip(*batch)
-            # Convert to tensors with proper shape handling
-            states_array = np.array(states, dtype=np.float32) / 255.0
-            next_states_array = np.array(next_states, dtype=np.float32) / 255.0
-            # Ensure proper tensor shapes
-            states_tensor = torch.FloatTensor(states_array).to(self.device)
-            next_states_tensor = torch.FloatTensor(next_states_array).to(self.device)
             actions_tensor = torch.LongTensor(actions).to(self.device)
             rewards_tensor = torch.FloatTensor(rewards).to(self.device)
             dones_tensor = torch.BoolTensor(dones).to(self.device)
-            if self.use_sentiment and sentiment_data[0] is not None:
-                # Extract sentiment features safely
-                sentiment_features = []
-                for data in sentiment_data:
-                    if data and 'sentiment' in data and 'confidence' in data:
-                        sentiment_features.append([data['sentiment'], data['confidence']])
-                    else:
-                        sentiment_features.append([0.5, 0.0])
-                sentiment_tensor = torch.FloatTensor(sentiment_features).to(self.device)
-                # Current Q values with sentiment
                 current_q = self.policy_net(states_tensor, sentiment_tensor)
-                current_q = current_q.gather(1, actions_tensor.unsqueeze(1))
-                # Next Q values with sentiment
-                with torch.no_grad():
-                    next_q = self.policy_net(next_states_tensor, sentiment_tensor)
-                    next_q = next_q.max(1)[0]
-                    target_q = rewards_tensor + (self.gamma * next_q * ~dones_tensor)
             else:
-                # Fallback to standard DQN without sentiment
                 current_q = self.policy_net(states_tensor)
-                current_q = current_q.gather(1, actions_tensor.unsqueeze(1))
-                with torch.no_grad():
-                    next_q = self.policy_net(next_states_tensor)
-                    next_q = next_q.max(1)[0]
-                    target_q = rewards_tensor + (self.gamma * next_q * ~dones_tensor)
-            # Compute loss
-            loss = nn.MSELoss()(current_q.squeeze(), target_q)
-            # Optimize
             self.optimizer.zero_grad()
             loss.backward()
-            # Gradient clipping for stability
             torch.nn.utils.clip_grad_norm_(self.policy_net.parameters(), 1.0)
             self.optimizer.step()
-            # Update exploration
-            self.epsilon = max(self.epsilon_min, self.epsilon * self.epsilon_decay)
             self.steps_done += 1
             return float(loss.item())
         except Exception as e:
-            print(f"Error in advanced update: {e}")
             return 0.0
-# Fallback to simple agent if advanced one fails
 class SimpleTradingNetwork(nn.Module):
     def __init__(self, state_dim, action_dim):
         super(SimpleTradingNetwork, self).__init__()
         self.conv_layers = nn.Sequential(
             nn.Conv2d(4, 16, kernel_size=4, stride=2),
             nn.ReLU(),
@@ -229,7 +284,7 @@ class SimpleTradingNetwork(nn.Module):
             nn.ReLU(),
             nn.AdaptiveAvgPool2d((8, 8))
         )
         self.fc_layers = nn.Sequential(
             nn.Linear(32 * 8 * 8, 128),
             nn.ReLU(),
@@ -238,21 +293,18 @@ class SimpleTradingNetwork(nn.Module):
             nn.ReLU(),
             nn.Linear(64, action_dim)
         )
     def forward(self, x):
         try:
-            # Handle input shape
-            if len(x.shape) == 4:  # (batch, H, W, C)
-                x = x.permute(0, 3, 1, 2).contiguous()
-            else:
-                x = x.unsqueeze(0) if len(x.shape) == 3 else x
-                x = x.permute(0, 3, 1, 2).contiguous()
             x = self.conv_layers(x)
-            batch_size = x.size(0)
-            x = x.reshape(batch_size, -1)
             x = self.fc_layers(x)
             return x
         except Exception as e:
             print(f"Error in simple network: {e}")
-            return torch.zeros((x.size(0), self.fc_layers[-1].out_features))

 import numpy as np
 from collections import deque
 import random
+import warnings
+warnings.filterwarnings('ignore')
 class EnhancedTradingNetwork(nn.Module):
     def __init__(self, state_dim, action_dim, sentiment_dim=2):
         super(EnhancedTradingNetwork, self).__init__()
         # Visual processing branch
         self.visual_conv = nn.Sequential(
             nn.Conv2d(4, 16, kernel_size=4, stride=2),
             nn.ReLU(),
             nn.AdaptiveAvgPool2d((8, 8))
         )
         # Calculate the output size after conv layers
         self.conv_output_size = 32 * 8 * 8
         self.visual_fc = nn.Sequential(
             nn.Linear(self.conv_output_size, 256),
             nn.ReLU(),
             nn.Dropout(0.3)
         )
         # Sentiment processing branch
         self.sentiment_fc = nn.Sequential(
             nn.Linear(sentiment_dim, 64),
             nn.Linear(64, 32),
             nn.ReLU()
         )
         # Combined decision making
         self.combined_fc = nn.Sequential(
             nn.Linear(256 + 32, 128),
             nn.ReLU(),
             nn.Linear(64, action_dim)
         )
+        # Store action_dim for error handling
+        self.action_dim = action_dim
     def forward(self, x, sentiment=None):
         try:
+            # Ensure input has batch dimension
+            if len(x.shape) == 3:  # (H, W, C)
+                x = x.unsqueeze(0)
+            elif len(x.shape) == 4:  # (batch, H, W, C)
+                pass
             else:
+                raise ValueError(f"Invalid input shape: {x.shape}")
+            # Permute to (batch, C, H, W)
+            x = x.permute(0, 3, 1, 2).contiguous().float()
+            # Check if channels match expected input
+            if x.size(1) != 4:
+                raise ValueError(f"Expected 4 channels, got {x.size(1)}")
+            visual_features = self.visual_conv(x)
             batch_size = visual_features.size(0)
             visual_features = visual_features.reshape(batch_size, -1)
             visual_features = self.visual_fc(visual_features)
             # Sentiment processing
+            if sentiment is not None and self.sentiment_fc is not None:
                 if len(sentiment.shape) == 1:
                     sentiment = sentiment.unsqueeze(0)
+                sentiment = sentiment.float()
                 sentiment_features = self.sentiment_fc(sentiment)
                 combined_features = torch.cat([visual_features, sentiment_features], dim=1)
             else:
+                # Pad with zeros if no sentiment
+                sentiment_features = torch.zeros(batch_size, 32, device=visual_features.device)
+                combined_features = torch.cat([visual_features, sentiment_features], dim=1)
             q_values = self.combined_fc(combined_features)
             return q_values
         except Exception as e:
             print(f"Error in network forward: {e}")
+            print(f"Input shape: {getattr(x, 'shape', 'Unknown')}")
+            # Return safe default with correct shape
+            batch_size = x.size(0) if hasattr(x, 'size') else 1
+            return torch.zeros(batch_size, self.action_dim, device=(x.device if hasattr(x, 'device') else 'cpu'))
 class AdvancedTradingAgent:
     def __init__(self, state_dim, action_dim, learning_rate=0.001, use_sentiment=True):
+        self.state_dim = state_dim  # Should be (84, 84, 4) or similar
         self.action_dim = action_dim
         self.learning_rate = learning_rate
         self.use_sentiment = use_sentiment
         self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
         print(f"Using device: {self.device}")
         # Neural network
         self.policy_net = EnhancedTradingNetwork(state_dim, action_dim).to(self.device)
         self.optimizer = optim.Adam(self.policy_net.parameters(), lr=learning_rate)
+        self.loss_fn = nn.MSELoss()
         # Experience replay
+        self.memory = deque(maxlen=10000)  # Increased buffer size
+        self.batch_size = min(32, state_dim[0]//2)  # Dynamic batch size
         # Training parameters
         self.gamma = 0.99
         self.epsilon = 1.0
+        self.epsilon_min = 0.01  # More aggressive exploration decay
+        self.epsilon_decay = 0.9995  # Slower decay
         self.steps_done = 0
+        self.target_update_freq = 100  # Target network update frequency
+        self.steps_since_target_update = 0
+    def select_action(self, state, current_sentiment=None, sentiment_confidence=None):
+        """Select action with epsilon-greedy policy"""
         if random.random() < self.epsilon:
             return random.randint(0, self.action_dim - 1)
         try:
+            # Validate and normalize state
+            if not isinstance(state, np.ndarray):
+                state = np.array(state)
+            if state.dtype != np.float32:
+                state = state.astype(np.float32)
+            # Normalize pixel values
+            if state.max() > 1.0:
+                state = state / 255.0
+            state_tensor = torch.FloatTensor(state).to(self.device)
+            # Prepare sentiment input
+            if self.use_sentiment and current_sentiment is not None:
+                sentiment = np.array([float(current_sentiment), float(sentiment_confidence or 0.0)])
+                sentiment_tensor = torch.FloatTensor(sentiment).to(self.device)
                 with torch.no_grad():
                     q_values = self.policy_net(state_tensor, sentiment_tensor)
             else:
                 with torch.no_grad():
                     q_values = self.policy_net(state_tensor)
+            action = int(q_values.argmax().item())
+            return action
         except Exception as e:
+            print(f"Error in action selection: {e}")
             return random.randint(0, self.action_dim - 1)
     def store_transition(self, state, action, reward, next_state, done, sentiment_data=None):
+        """Store experience tuple safely"""
         try:
+            # Ensure all inputs are numpy arrays
+            if not isinstance(state, np.ndarray):
+                state = np.array(state, dtype=np.float32)
+            if not isinstance(next_state, np.ndarray):
+                next_state = np.array(next_state, dtype=np.float32)
+            # Normalize before storing
+            if state.max() > 1.0:
+                state = state / 255.0
+            if next_state.max() > 1.0:
+                next_state = next_state / 255.0
+            # Handle sentiment data
+            if sentiment_data is None:
+                sentiment_data = {'sentiment': 0.5, 'confidence': 0.0}
+            experience = (state, action, float(reward), next_state, bool(done), sentiment_data)
             self.memory.append(experience)
         except Exception as e:
             print(f"Error storing transition: {e}")
     def update(self):
+        """DQN update with improved stability"""
         if len(self.memory) < self.batch_size:
             return 0.0
         try:
             batch = random.sample(self.memory, self.batch_size)
+            states, actions, rewards, next_states, dones, sentiments = zip(*batch)
+            # Convert to tensors
+            states = np.stack(states)
+            next_states = np.stack(next_states)
+            actions = np.array(actions)
+            rewards = np.array(rewards)
+            dones = np.array(dones)
+            states_tensor = torch.FloatTensor(states).to(self.device)
+            next_states_tensor = torch.FloatTensor(next_states).to(self.device)
             actions_tensor = torch.LongTensor(actions).to(self.device)
             rewards_tensor = torch.FloatTensor(rewards).to(self.device)
             dones_tensor = torch.BoolTensor(dones).to(self.device)
+            # Compute current Q values
+            if self.use_sentiment:
+                # Use sentiment from current state
+                sentiment_batch = []
+                for sentiment_data in sentiments:
+                    sentiment = [sentiment_data.get('sentiment', 0.5),
+                               sentiment_data.get('confidence', 0.0)]
+                    sentiment_batch.append(sentiment)
+                sentiment_tensor = torch.FloatTensor(sentiment_batch).to(self.device)
                 current_q = self.policy_net(states_tensor, sentiment_tensor)
             else:
                 current_q = self.policy_net(states_tensor)
+            current_q = current_q.gather(1, actions_tensor.unsqueeze(1)).squeeze(1)
+            # Compute target Q values
+            with torch.no_grad():
+                if self.use_sentiment:
+                    next_sentiment_batch = []
+                    for sentiment_data in sentiments:
+                        next_sentiment = [sentiment_data.get('sentiment', 0.5),
+                                        sentiment_data.get('confidence', 0.0)]
+                        next_sentiment_batch.append(next_sentiment)
+                    next_sentiment_tensor = torch.FloatTensor(next_sentiment_batch).to(self.device)
+                    next_q = self.policy_net(next_states_tensor, next_sentiment_tensor)
+                else:
+                    next_q = self.policy_net(next_states_tensor)
+                next_q_max = next_q.max(1)[0]
+                target_q = rewards_tensor + (self.gamma * next_q_max * ~dones_tensor)
+            # Compute loss and optimize
+            loss = self.loss_fn(current_q, target_q)
             self.optimizer.zero_grad()
             loss.backward()
             torch.nn.utils.clip_grad_norm_(self.policy_net.parameters(), 1.0)
             self.optimizer.step()
+            # Update epsilon
+            if self.epsilon > self.epsilon_min:
+                self.epsilon *= self.epsilon_decay
             self.steps_done += 1
+            self.steps_since_target_update += 1
+            # Update target network periodically (if implemented)
+            if self.steps_since_target_update % self.target_update_freq == 0:
+                self._update_target_network()
             return float(loss.item())
         except Exception as e:
+            print(f"Error in update: {e}")
+            import traceback
+            traceback.print_exc()
             return 0.0
+    def _update_target_network(self):
+        """Update target network (placeholder for double DQN)"""
+        pass  # Implement target network update here
+# Simple fallback network
 class SimpleTradingNetwork(nn.Module):
     def __init__(self, state_dim, action_dim):
         super(SimpleTradingNetwork, self).__init__()
+        self.action_dim = action_dim
         self.conv_layers = nn.Sequential(
             nn.Conv2d(4, 16, kernel_size=4, stride=2),
             nn.ReLU(),
             nn.ReLU(),
             nn.AdaptiveAvgPool2d((8, 8))
         )
         self.fc_layers = nn.Sequential(
             nn.Linear(32 * 8 * 8, 128),
             nn.ReLU(),
             nn.ReLU(),
             nn.Linear(64, action_dim)
         )
     def forward(self, x):
         try:
+            if len(x.shape) == 3:
+                x = x.unsqueeze(0)
+            x = x.permute(0, 3, 1, 2).contiguous().float()
             x = self.conv_layers(x)
+            x = x.reshape(x.size(0), -1)
             x = self.fc_layers(x)
             return x
         except Exception as e:
             print(f"Error in simple network: {e}")
+            batch_size = x.size(0) if hasattr(x, 'size') else 1
+            return torch.zeros(batch_size, self.action_dim, device=(x.device if hasattr(x, 'device') else 'cpu'))