Spaces:

OmidSakaki
/

VisualTradingAI

Runtime error

App Files Files Community

OmidSakaki commited on Oct 15, 2025

Commit

5a78d94

verified ·

1 Parent(s): 3457ff1

Update src/agents/visual_agent.py

Browse files

Files changed (1) hide show

src/agents/visual_agent.py +50 -45

src/agents/visual_agent.py CHANGED Viewed

@@ -11,14 +11,14 @@ class VisualTradingAgent:
         self.action_dim = action_dim
         self.learning_rate = learning_rate
         self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
         # Neural network
         self.policy_net = TradingCNN(state_dim, action_dim).to(self.device)
-        self.target_net = TradingCNN(state_dim, action_dim).to(self.device)
         self.optimizer = optim.Adam(self.policy_net.parameters(), lr=learning_rate)
         # Experience replay
-        self.memory = deque(maxlen=10000)
         self.batch_size = 32
         # Training parameters
@@ -26,18 +26,19 @@ class VisualTradingAgent:
         self.epsilon = 1.0
         self.epsilon_min = 0.01
         self.epsilon_decay = 0.995
-        self.update_target_every = 1000
-        self.steps_done = 0
     def select_action(self, state):
         """Select action using epsilon-greedy policy"""
         if random.random() < self.epsilon:
             return random.randint(0, self.action_dim - 1)
-        state_tensor = torch.FloatTensor(state).unsqueeze(0).to(self.device)
-        with torch.no_grad():
-            q_values = self.policy_net(state_tensor)
-        return q_values.argmax().item()
     def store_transition(self, state, action, reward, next_state, done):
         """Store experience in replay memory"""
@@ -48,42 +49,42 @@ class VisualTradingAgent:
         if len(self.memory) < self.batch_size:
             return 0
-        # Sample batch from memory
-        batch = random.sample(self.memory, self.batch_size)
-        states, actions, rewards, next_states, dones = zip(*batch)
-        # Convert to tensors
-        states = torch.FloatTensor(np.array(states)).to(self.device)
-        actions = torch.LongTensor(actions).to(self.device)
-        rewards = torch.FloatTensor(rewards).to(self.device)
-        next_states = torch.FloatTensor(np.array(next_states)).to(self.device)
-        dones = torch.BoolTensor(dones).to(self.device)
-        # Current Q values
-        current_q = self.policy_net(states).gather(1, actions.unsqueeze(1))
-        # Next Q values
-        with torch.no_grad():
-            next_q = self.target_net(next_states).max(1)[0]
-            target_q = rewards + (self.gamma * next_q * ~dones)
-        # Compute loss
-        loss = nn.MSELoss()(current_q.squeeze(), target_q)
-        # Optimize
-        self.optimizer.zero_grad()
-        loss.backward()
-        self.optimizer.step()
-        # Update target network
-        self.steps_done += 1
-        if self.steps_done % self.update_target_every == 0:
-            self.target_net.load_state_dict(self.policy_net.state_dict())
-        # Decay epsilon
-        self.epsilon = max(self.epsilon_min, self.epsilon * self.epsilon_decay)
-        return loss.item()
 class TradingCNN(nn.Module):
     def __init__(self, state_dim, action_dim):
@@ -113,7 +114,11 @@ class TradingCNN(nn.Module):
     def forward(self, x):
         # x shape: (batch_size, 84, 84, 4) -> (batch_size, 4, 84, 84)
-        x = x.permute(0, 3, 1, 2)
         x = self.conv_layers(x)
         x = x.view(x.size(0), -1)
         x = self.fc_layers(x)

         self.action_dim = action_dim
         self.learning_rate = learning_rate
         self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+        print(f"Using device: {self.device}")
         # Neural network
         self.policy_net = TradingCNN(state_dim, action_dim).to(self.device)
         self.optimizer = optim.Adam(self.policy_net.parameters(), lr=learning_rate)
         # Experience replay
+        self.memory = deque(maxlen=1000)
         self.batch_size = 32
         # Training parameters
         self.epsilon = 1.0
         self.epsilon_min = 0.01
         self.epsilon_decay = 0.995
     def select_action(self, state):
         """Select action using epsilon-greedy policy"""
         if random.random() < self.epsilon:
             return random.randint(0, self.action_dim - 1)
+        try:
+            state_tensor = torch.FloatTensor(state).unsqueeze(0).to(self.device)
+            with torch.no_grad():
+                q_values = self.policy_net(state_tensor)
+            return q_values.argmax().item()
+        except:
+            return random.randint(0, self.action_dim - 1)
     def store_transition(self, state, action, reward, next_state, done):
         """Store experience in replay memory"""
         if len(self.memory) < self.batch_size:
             return 0
+        try:
+            # Sample batch from memory
+            batch = random.sample(self.memory, self.batch_size)
+            states, actions, rewards, next_states, dones = zip(*batch)
+            # Convert to tensors
+            states = torch.FloatTensor(np.array(states)).to(self.device)
+            actions = torch.LongTensor(actions).to(self.device)
+            rewards = torch.FloatTensor(rewards).to(self.device)
+            next_states = torch.FloatTensor(np.array(next_states)).to(self.device)
+            dones = torch.BoolTensor(dones).to(self.device)
+            # Current Q values
+            current_q = self.policy_net(states).gather(1, actions.unsqueeze(1))
+            # Next Q values
+            with torch.no_grad():
+                next_q = self.policy_net(next_states).max(1)[0]
+                target_q = rewards + (self.gamma * next_q * ~dones)
+            # Compute loss
+            loss = nn.MSELoss()(current_q.squeeze(), target_q)
+            # Optimize
+            self.optimizer.zero_grad()
+            loss.backward()
+            self.optimizer.step()
+            # Decay epsilon
+            self.epsilon = max(self.epsilon_min, self.epsilon * self.epsilon_decay)
+            return loss.item()
+        except Exception as e:
+            print(f"Error in update: {e}")
+            return 0
 class TradingCNN(nn.Module):
     def __init__(self, state_dim, action_dim):
     def forward(self, x):
         # x shape: (batch_size, 84, 84, 4) -> (batch_size, 4, 84, 84)
+        if len(x.shape) == 4:  # Single observation
+            x = x.permute(0, 3, 1, 2)
+        else:  # Batch of observations
+            x = x.permute(0, 3, 1, 2)
         x = self.conv_layers(x)
         x = x.view(x.size(0), -1)
         x = self.fc_layers(x)