Spaces:

OmidSakaki
/

VisualTradingAI

Sleeping

App Files Files Community

OmidSakaki commited on Oct 15, 2025

Commit

1f5a715

verified ·

1 Parent(s): 208b262

Update src/agents/visual_agent.py

Browse files

Files changed (1) hide show

src/agents/visual_agent.py +30 -20

src/agents/visual_agent.py CHANGED Viewed

@@ -13,18 +13,18 @@ class VisualTradingAgent:
         self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
         print(f"Using device: {self.device}")
-        # Neural network
-        self.policy_net = TradingCNN(state_dim, action_dim).to(self.device)
         self.optimizer = optim.Adam(self.policy_net.parameters(), lr=learning_rate)
         # Experience replay
-        self.memory = deque(maxlen=1000)
-        self.batch_size = 32
         # Training parameters
         self.gamma = 0.99
         self.epsilon = 1.0
-        self.epsilon_min = 0.01
         self.epsilon_decay = 0.995
     def select_action(self, state):
@@ -33,11 +33,15 @@ class VisualTradingAgent:
             return random.randint(0, self.action_dim - 1)
         try:
-            state_tensor = torch.FloatTensor(state).unsqueeze(0).to(self.device)
             with torch.no_grad():
                 q_values = self.policy_net(state_tensor)
             return q_values.argmax().item()
-        except:
             return random.randint(0, self.action_dim - 1)
     def store_transition(self, state, action, reward, next_state, done):
@@ -54,11 +58,11 @@ class VisualTradingAgent:
             batch = random.sample(self.memory, self.batch_size)
             states, actions, rewards, next_states, dones = zip(*batch)
-            # Convert to tensors
-            states = torch.FloatTensor(np.array(states)).to(self.device)
             actions = torch.LongTensor(actions).to(self.device)
             rewards = torch.FloatTensor(rewards).to(self.device)
-            next_states = torch.FloatTensor(np.array(next_states)).to(self.device)
             dones = torch.BoolTensor(dones).to(self.device)
             # Current Q values
@@ -75,6 +79,9 @@ class VisualTradingAgent:
             # Optimize
             self.optimizer.zero_grad()
             loss.backward()
             self.optimizer.step()
             # Decay epsilon
@@ -86,30 +93,33 @@ class VisualTradingAgent:
             print(f"Error in update: {e}")
             return 0
-class TradingCNN(nn.Module):
     def __init__(self, state_dim, action_dim):
-        super(TradingCNN, self).__init__()
-        # CNN for visual processing
         self.conv_layers = nn.Sequential(
-            nn.Conv2d(4, 32, kernel_size=8, stride=4),
             nn.ReLU(),
-            nn.Conv2d(32, 64, kernel_size=4, stride=2),
             nn.ReLU(),
-            nn.Conv2d(64, 64, kernel_size=3, stride=1),
             nn.ReLU(),
-            nn.AdaptiveAvgPool2d((6, 6))
         )
         # Fully connected layers
         self.fc_layers = nn.Sequential(
-            nn.Linear(64 * 6 * 6, 512),
             nn.ReLU(),
             nn.Dropout(0.2),
-            nn.Linear(512, 256),
             nn.ReLU(),
             nn.Dropout(0.2),
-            nn.Linear(256, action_dim)
         )
     def forward(self, x):

         self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
         print(f"Using device: {self.device}")
+        # Neural network - simplified for stability
+        self.policy_net = SimpleTradingNetwork(state_dim, action_dim).to(self.device)
         self.optimizer = optim.Adam(self.policy_net.parameters(), lr=learning_rate)
         # Experience replay
+        self.memory = deque(maxlen=500)  # Smaller memory for stability
+        self.batch_size = 16
         # Training parameters
         self.gamma = 0.99
         self.epsilon = 1.0
+        self.epsilon_min = 0.1
         self.epsilon_decay = 0.995
     def select_action(self, state):
             return random.randint(0, self.action_dim - 1)
         try:
+            # Normalize state and convert to tensor
+            state_normalized = state.astype(np.float32) / 255.0
+            state_tensor = torch.FloatTensor(state_normalized).unsqueeze(0).to(self.device)
             with torch.no_grad():
                 q_values = self.policy_net(state_tensor)
             return q_values.argmax().item()
+        except Exception as e:
+            print(f"Error in action selection: {e}")
             return random.randint(0, self.action_dim - 1)
     def store_transition(self, state, action, reward, next_state, done):
             batch = random.sample(self.memory, self.batch_size)
             states, actions, rewards, next_states, dones = zip(*batch)
+            # Convert to tensors with normalization
+            states = torch.FloatTensor(np.array(states)).to(self.device) / 255.0
             actions = torch.LongTensor(actions).to(self.device)
             rewards = torch.FloatTensor(rewards).to(self.device)
+            next_states = torch.FloatTensor(np.array(next_states)).to(self.device) / 255.0
             dones = torch.BoolTensor(dones).to(self.device)
             # Current Q values
             # Optimize
             self.optimizer.zero_grad()
             loss.backward()
+            # Gradient clipping for stability
+            torch.nn.utils.clip_grad_norm_(self.policy_net.parameters(), 1.0)
             self.optimizer.step()
             # Decay epsilon
             print(f"Error in update: {e}")
             return 0
+class SimpleTradingNetwork(nn.Module):
     def __init__(self, state_dim, action_dim):
+        super(SimpleTradingNetwork, self).__init__()
+        # Simplified CNN for faster training
         self.conv_layers = nn.Sequential(
+            nn.Conv2d(4, 16, kernel_size=4, stride=2),  # Input: 84x84x4
             nn.ReLU(),
+            nn.Conv2d(16, 32, kernel_size=4, stride=2), # 41x41x16 -> 19x19x32
             nn.ReLU(),
+            nn.Conv2d(32, 32, kernel_size=3, stride=1), # 19x19x32 -> 17x17x32
             nn.ReLU(),
+            nn.AdaptiveAvgPool2d((8, 8))  # 17x17x32 -> 8x8x32
         )
+        # Calculate flattened size
+        self.flattened_size = 32 * 8 * 8
         # Fully connected layers
         self.fc_layers = nn.Sequential(
+            nn.Linear(self.flattened_size, 128),
             nn.ReLU(),
             nn.Dropout(0.2),
+            nn.Linear(128, 64),
             nn.ReLU(),
             nn.Dropout(0.2),
+            nn.Linear(64, action_dim)
         )
     def forward(self, x):