Spaces:

OmidSakaki
/

VisualTradingAI

Sleeping

App Files Files Community

OmidSakaki commited on Oct 15, 2025

Commit

de24124

verified ·

1 Parent(s): 8f54cbf

Update src/agents/visual_agent.py

Browse files

Files changed (1) hide show

src/agents/visual_agent.py +74 -56

src/agents/visual_agent.py CHANGED Viewed

@@ -5,6 +5,52 @@ import numpy as np
 from collections import deque
 import random
 class VisualTradingAgent:
     def __init__(self, state_dim, action_dim, learning_rate=0.001):
         self.state_dim = state_dim
@@ -13,7 +59,7 @@ class VisualTradingAgent:
         self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
         print(f"Using device: {self.device}")
-        # Neural network - simplified for stability
         self.policy_net = SimpleTradingNetwork(state_dim, action_dim).to(self.device)
         self.optimizer = optim.Adam(self.policy_net.parameters(), lr=learning_rate)
@@ -26,6 +72,8 @@ class VisualTradingAgent:
         self.epsilon = 1.0
         self.epsilon_min = 0.1
         self.epsilon_decay = 0.995
     def select_action(self, state):
         """Select action using epsilon-greedy policy"""
@@ -39,19 +87,22 @@ class VisualTradingAgent:
             with torch.no_grad():
                 q_values = self.policy_net(state_tensor)
-            return q_values.argmax().item()
         except Exception as e:
             print(f"Error in action selection: {e}")
             return random.randint(0, self.action_dim - 1)
     def store_transition(self, state, action, reward, next_state, done):
         """Store experience in replay memory"""
-        self.memory.append((state, action, reward, next_state, done))
     def update(self):
         """Update the neural network"""
         if len(self.memory) < self.batch_size:
-            return 0
         try:
             # Sample batch from memory
@@ -59,19 +110,22 @@ class VisualTradingAgent:
             states, actions, rewards, next_states, dones = zip(*batch)
             # Convert to tensors with normalization
-            states = torch.FloatTensor(np.array(states)).to(self.device) / 255.0
-            actions = torch.LongTensor(actions).to(self.device)
-            rewards = torch.FloatTensor(rewards).to(self.device)
-            next_states = torch.FloatTensor(np.array(next_states)).to(self.device) / 255.0
-            dones = torch.BoolTensor(dones).to(self.device)
             # Current Q values
-            current_q = self.policy_net(states).gather(1, actions.unsqueeze(1))
             # Next Q values
             with torch.no_grad():
-                next_q = self.policy_net(next_states).max(1)[0]
-                target_q = rewards + (self.gamma * next_q * ~dones)
             # Compute loss
             loss = nn.MSELoss()(current_q.squeeze(), target_q)
@@ -84,52 +138,16 @@ class VisualTradingAgent:
             torch.nn.utils.clip_grad_norm_(self.policy_net.parameters(), 1.0)
             self.optimizer.step()
-            # Decay epsilon
             self.epsilon = max(self.epsilon_min, self.epsilon * self.epsilon_decay)
-            return loss.item()
         except Exception as e:
             print(f"Error in update: {e}")
-            return 0
-class SimpleTradingNetwork(nn.Module):
-    def __init__(self, state_dim, action_dim):
-        super(SimpleTradingNetwork, self).__init__()
-        # Simplified CNN for faster training
-        self.conv_layers = nn.Sequential(
-            nn.Conv2d(4, 16, kernel_size=4, stride=2),  # Input: 84x84x4
-            nn.ReLU(),
-            nn.Conv2d(16, 32, kernel_size=4, stride=2), # 41x41x16 -> 19x19x32
-            nn.ReLU(),
-            nn.Conv2d(32, 32, kernel_size=3, stride=1), # 19x19x32 -> 17x17x32
-            nn.ReLU(),
-            nn.AdaptiveAvgPool2d((8, 8))  # 17x17x32 -> 8x8x32
-        )
-        # Calculate flattened size
-        self.flattened_size = 32 * 8 * 8
-        # Fully connected layers
-        self.fc_layers = nn.Sequential(
-            nn.Linear(self.flattened_size, 128),
-            nn.ReLU(),
-            nn.Dropout(0.2),
-            nn.Linear(128, 64),
-            nn.ReLU(),
-            nn.Dropout(0.2),
-            nn.Linear(64, action_dim)
-        )
-    def forward(self, x):
-        # x shape: (batch_size, 84, 84, 4) -> (batch_size, 4, 84, 84)
-        if len(x.shape) == 4:  # Single observation
-            x = x.permute(0, 3, 1, 2)
-        else:  # Batch of observations
-            x = x.permute(0, 3, 1, 2)
-        x = self.conv_layers(x)
-        x = x.view(x.size(0), -1)
-        x = self.fc_layers(x)
-        return x

 from collections import deque
 import random
+class SimpleTradingNetwork(nn.Module):
+    def __init__(self, state_dim, action_dim):
+        super(SimpleTradingNetwork, self).__init__()
+        # Simplified CNN for faster training
+        self.conv_layers = nn.Sequential(
+            nn.Conv2d(4, 16, kernel_size=4, stride=2),  # Input: 84x84x4 -> 41x41x16
+            nn.ReLU(),
+            nn.Conv2d(16, 32, kernel_size=4, stride=2), # 41x41x16 -> 19x19x32
+            nn.ReLU(),
+            nn.Conv2d(32, 32, kernel_size=3, stride=1), # 19x19x32 -> 17x17x32
+            nn.ReLU(),
+            nn.AdaptiveAvgPool2d((8, 8))  # 17x17x32 -> 8x8x32
+        )
+        # Calculate flattened size
+        self.flattened_size = 32 * 8 * 8
+        # Fully connected layers
+        self.fc_layers = nn.Sequential(
+            nn.Linear(self.flattened_size, 128),
+            nn.ReLU(),
+            nn.Dropout(0.2),
+            nn.Linear(128, 64),
+            nn.ReLU(),
+            nn.Dropout(0.2),
+            nn.Linear(64, action_dim)
+        )
+    def forward(self, x):
+        try:
+            # x shape: (batch_size, 84, 84, 4) -> (batch_size, 4, 84, 84)
+            if len(x.shape) == 4:  # Single observation
+                x = x.permute(0, 3, 1, 2)
+            else:  # Batch of observations
+                x = x.permute(0, 3, 1, 2)
+            x = self.conv_layers(x)
+            x = x.view(x.size(0), -1)
+            x = self.fc_layers(x)
+            return x
+        except Exception as e:
+            print(f"Error in network forward: {e}")
+            # Return zeros in case of error
+            return torch.zeros((x.size(0), self.fc_layers[-1].out_features))
 class VisualTradingAgent:
     def __init__(self, state_dim, action_dim, learning_rate=0.001):
         self.state_dim = state_dim
         self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
         print(f"Using device: {self.device}")
+        # Neural network
         self.policy_net = SimpleTradingNetwork(state_dim, action_dim).to(self.device)
         self.optimizer = optim.Adam(self.policy_net.parameters(), lr=learning_rate)
         self.epsilon = 1.0
         self.epsilon_min = 0.1
         self.epsilon_decay = 0.995
+        self.update_target_every = 100
+        self.steps_done = 0
     def select_action(self, state):
         """Select action using epsilon-greedy policy"""
             with torch.no_grad():
                 q_values = self.policy_net(state_tensor)
+            return int(q_values.argmax().item())
         except Exception as e:
             print(f"Error in action selection: {e}")
             return random.randint(0, self.action_dim - 1)
     def store_transition(self, state, action, reward, next_state, done):
         """Store experience in replay memory"""
+        try:
+            self.memory.append((state, action, reward, next_state, done))
+        except Exception as e:
+            print(f"Error storing transition: {e}")
     def update(self):
         """Update the neural network"""
         if len(self.memory) < self.batch_size:
+            return 0.0
         try:
             # Sample batch from memory
             states, actions, rewards, next_states, dones = zip(*batch)
             # Convert to tensors with normalization
+            states_array = np.array(states, dtype=np.float32) / 255.0
+            next_states_array = np.array(next_states, dtype=np.float32) / 255.0
+            states_tensor = torch.FloatTensor(states_array).to(self.device)
+            actions_tensor = torch.LongTensor(actions).to(self.device)
+            rewards_tensor = torch.FloatTensor(rewards).to(self.device)
+            next_states_tensor = torch.FloatTensor(next_states_array).to(self.device)
+            dones_tensor = torch.BoolTensor(dones).to(self.device)
             # Current Q values
+            current_q = self.policy_net(states_tensor).gather(1, actions_tensor.unsqueeze(1))
             # Next Q values
             with torch.no_grad():
+                next_q = self.policy_net(next_states_tensor).max(1)[0]
+                target_q = rewards_tensor + (self.gamma * next_q * ~dones_tensor)
             # Compute loss
             loss = nn.MSELoss()(current_q.squeeze(), target_q)
             torch.nn.utils.clip_grad_norm_(self.policy_net.parameters(), 1.0)
             self.optimizer.step()
+            # Update steps and decay epsilon
+            self.steps_done += 1
+            if self.steps_done % self.update_target_every == 0:
+                # For simplicity, we're using the same network
+                pass
             self.epsilon = max(self.epsilon_min, self.epsilon * self.epsilon_decay)
+            return float(loss.item())
         except Exception as e:
             print(f"Error in update: {e}")
+            return 0.0