Spaces:

OmidSakaki
/

VisualTradingAI

Sleeping

App Files Files Community

OmidSakaki commited on Oct 16, 2025

Commit

cd19213

verified ·

1 Parent(s): 144fc70

Update src/environments/advanced_trading_env.py

Browse files

Files changed (1) hide show

src/environments/advanced_trading_env.py +259 -103

src/environments/advanced_trading_env.py CHANGED Viewed

@@ -1,137 +1,293 @@
 import numpy as np
 from .visual_trading_env import VisualTradingEnvironment
 from src.sentiment.twitter_analyzer import AdvancedSentimentAnalyzer
-from typing import Dict, Any
 class AdvancedTradingEnvironment(VisualTradingEnvironment):
-    def __init__(self, initial_balance=10000, risk_level="Medium", asset_type="Crypto",
-                 use_sentiment=True, sentiment_influence=0.3):
         super().__init__(initial_balance, risk_level, asset_type)
         self.use_sentiment = use_sentiment
-        self.sentiment_influence = sentiment_influence  # How much sentiment affects decisions
-        self.sentiment_history = []
-        self.sentiment_window = 20
         if use_sentiment:
-            self.sentiment_analyzer = AdvancedSentimentAnalyzer()
-            self.sentiment_analyzer.initialize_models()
-            self.current_sentiment = 0.5
-            self.sentiment_confidence = 0.0
-    def step(self, action):
         """Execute trading step with sentiment influence"""
-        # Get market sentiment before executing action
-        if self.use_sentiment and self.current_step % 5 == 0:  # Update sentiment every 5 steps
             self._update_sentiment()
-        # Execute the original step
-        observation, reward, done, info = super().step(action)
-        # Enhance reward with sentiment analysis
         if self.use_sentiment:
-            reward = self._apply_sentiment_to_reward(reward, action, info)
-        # Add sentiment info to the observation
         enhanced_observation = self._enhance_observation(observation)
-        # Add sentiment data to info
-        info['sentiment'] = self.current_sentiment
-        info['sentiment_confidence'] = self.sentiment_confidence
-        info['sentiment_influence'] = self.sentiment_influence
-        return enhanced_observation, reward, done, info
     def _update_sentiment(self):
-        """Update current market sentiment"""
         try:
             sentiment_data = self.sentiment_analyzer.get_influencer_sentiment()
-            self.current_sentiment = sentiment_data['market_sentiment']
-            self.sentiment_confidence = sentiment_data['confidence']
-            # Update sentiment history
-            self.sentiment_history.append(self.current_sentiment)
-            if len(self.sentiment_history) > self.sentiment_window:
-                self.sentiment_history.pop(0)
         except Exception as e:
-            print(f"Error updating sentiment: {e}")
             self.current_sentiment = 0.5
             self.sentiment_confidence = 0.0
-    def _apply_sentiment_to_reward(self, original_reward: float, action: int, info: Dict) -> float:
-        """Modify reward based on sentiment analysis"""
-        if self.sentiment_confidence < 0.3:  # Low confidence, minimal influence
             return original_reward
-        sentiment_multiplier = 1.0
-        # Bullish sentiment should reward buying actions
-        if self.current_sentiment > 0.6:  # Bullish
-            if action == 1:  # Buy
-                sentiment_multiplier += self.sentiment_influence * self.sentiment_confidence
-            elif action == 2:  # Sell (increase position)
-                sentiment_multiplier += self.sentiment_influence * 0.5 * self.sentiment_confidence
-            elif action == 3:  # Close (might miss opportunity)
-                sentiment_multiplier -= self.sentiment_influence * 0.3 * self.sentiment_confidence
-        # Bearish sentiment should reward selling/closing actions
-        elif self.current_sentiment < 0.4:  # Bearish
-            if action == 3:  # Close position
-                sentiment_multiplier += self.sentiment_influence * self.sentiment_confidence
-            elif action == 1:  # Buy (might be risky)
-                sentiment_multiplier -= self.sentiment_influence * 0.5 * self.sentiment_confidence
-        # Apply sentiment trend momentum
-        if len(self.sentiment_history) > 5:
-            recent_trend = np.mean(self.sentiment_history[-5:]) - np.mean(self.sentiment_history[-10:-5])
-            trend_influence = recent_trend * self.sentiment_influence * 0.5
-            sentiment_multiplier += trend_influence
-        enhanced_reward = original_reward * sentiment_multiplier
-        # Ensure reward doesn't become too extreme
-        max_reward = abs(original_reward) * 3
-        return np.clip(enhanced_reward, -max_reward, max_reward)
-    def _enhance_observation(self, original_observation):
-        """Add sentiment data to observation"""
-        # For simplicity, we'll keep the original visual observation
-        # In a more advanced implementation, we could encode sentiment in the image
-        return original_observation
-    def get_sentiment_analysis(self) -> Dict:
-        """Get detailed sentiment analysis"""
         if not self.use_sentiment:
-            return {"error": "Sentiment analysis disabled"}
-        return {
-            "current_sentiment": self.current_sentiment,
-            "sentiment_confidence": self.sentiment_confidence,
-            "sentiment_trend": self._calculate_sentiment_trend(),
-            "influence_level": self.sentiment_influence,
-            "history_length": len(self.sentiment_history)
-        }
-    def _calculate_sentiment_trend(self) -> str:
         """Calculate sentiment trend direction"""
         if len(self.sentiment_history) < 5:
-            return "neutral"
-        recent = np.mean(self.sentiment_history[-5:])
-        previous = np.mean(self.sentiment_history[-10:-5]) if len(self.sentiment_history) >= 10 else recent
-        if recent > previous + 0.1:
-            return "improving"
-        elif recent < previous - 0.1:
-            return "deteriorating"
-        else:
-            return "stable"
-    def reset(self):
-        """Reset environment including sentiment data"""
-        observation = super().reset()
-        self.sentiment_history = []
-        self.current_sentiment = 0.5
-        self.sentiment_confidence = 0.0
-        return observation

 import numpy as np
+import logging
+from typing import Dict, Any, Optional, Tuple
 from .visual_trading_env import VisualTradingEnvironment
 from src.sentiment.twitter_analyzer import AdvancedSentimentAnalyzer
+# Setup logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
 class AdvancedTradingEnvironment(VisualTradingEnvironment):
+    def __init__(self, initial_balance=10000, risk_level="Medium", asset_type="Crypto",
+                 use_sentiment=True, sentiment_influence=0.3, sentiment_update_freq=5):
         super().__init__(initial_balance, risk_level, asset_type)
+        # Validate inputs
+        if not 0.0 <= sentiment_influence <= 1.0:
+            raise ValueError("sentiment_influence must be between 0.0 and 1.0")
+        if sentiment_update_freq < 1:
+            raise ValueError("sentiment_update_freq must be at least 1")
         self.use_sentiment = use_sentiment
+        self.sentiment_influence = sentiment_influence
+        self.sentiment_update_freq = sentiment_update_freq
+        self.sentiment_history = deque(maxlen=100)  # Limited history
+        self.current_step = 0
+        # Sentiment analyzer with error handling
+        self.sentiment_analyzer = None
+        self.current_sentiment = 0.5
+        self.sentiment_confidence = 0.0
         if use_sentiment:
+            try:
+                self.sentiment_analyzer = AdvancedSentimentAnalyzer()
+                self.sentiment_analyzer.initialize_models()
+                logger.info("Sentiment analyzer initialized successfully")
+            except Exception as e:
+                logger.warning(f"Failed to initialize sentiment analyzer: {e}. Disabling sentiment.")
+                self.use_sentiment = False
+    def step(self, action: int) -> Tuple[np.ndarray, float, bool, Dict[str, Any]]:
         """Execute trading step with sentiment influence"""
+        if not isinstance(action, int) or action < 0:
+            logger.warning(f"Invalid action {action}, defaulting to hold")
+            action = 0  # Hold action as default
+        # Update sentiment periodically
+        if self.use_sentiment and self.current_step % self.sentiment_update_freq == 0:
             self._update_sentiment()
+        self.current_step += 1
+        # Execute base environment step
+        try:
+            observation, reward, done, info = super().step(action)
+        except Exception as e:
+            logger.error(f"Error in base environment step: {e}")
+            # Return safe defaults
+            observation = self._get_safe_observation()
+            reward = 0.0
+            done = False
+            info = {}
+        # Apply sentiment modification to reward
         if self.use_sentiment:
+            try:
+                reward = self._apply_sentiment_to_reward(reward, action, info)
+            except Exception as e:
+                logger.warning(f"Error applying sentiment to reward: {e}")
+        # Enhance observation with sentiment (optional)
         enhanced_observation = self._enhance_observation(observation)
+        # Add sentiment info to info dict
+        info.update({
+            'sentiment': float(self.current_sentiment),
+            'sentiment_confidence': float(self.sentiment_confidence),
+            'sentiment_influence': float(self.sentiment_influence),
+            'step': self.current_step
+        })
+        return enhanced_observation, float(reward), bool(done), info
     def _update_sentiment(self):
+        """Update current market sentiment with robust error handling"""
+        if not self.sentiment_analyzer:
+            return
         try:
             sentiment_data = self.sentiment_analyzer.get_influencer_sentiment()
+            # Validate sentiment data
+            if not isinstance(sentiment_data, dict):
+                raise ValueError("Invalid sentiment data format")
+            market_sentiment = sentiment_data.get('market_sentiment')
+            confidence = sentiment_data.get('confidence')
+            if market_sentiment is None or not (-1.0 <= market_sentiment <= 1.0):
+                raise ValueError("Invalid market_sentiment value")
+            if confidence is None or not (0.0 <= confidence <= 1.0):
+                raise ValueError("Invalid confidence value")
+            self.current_sentiment = float(market_sentiment)
+            self.sentiment_confidence = float(confidence)
+            # Normalize sentiment to 0-1 range for consistency
+            self.current_sentiment = (self.current_sentiment + 1.0) / 2.0
+            # Update history
+            self.sentiment_history.append({
+                'sentiment': self.current_sentiment,
+                'confidence': self.sentiment_confidence,
+                'timestamp': self.current_step
+            })
+            logger.debug(f"Updated sentiment: {self.current_sentiment:.3f} (conf: {self.sentiment_confidence:.3f})")
         except Exception as e:
+            logger.warning(f"Error updating sentiment: {e}")
+            # Fallback to neutral sentiment
             self.current_sentiment = 0.5
             self.sentiment_confidence = 0.0
+            self.sentiment_history.append({
+                'sentiment': 0.5,
+                'confidence': 0.0,
+                'timestamp': self.current_step
+            })
+    def _apply_sentiment_to_reward(self, original_reward: float, action: int,
+                                 info: Dict[str, Any]) -> float:
+        """Modify reward based on sentiment analysis with bounds checking"""
+        if self.sentiment_confidence < 0.3:
             return original_reward
+        try:
+            sentiment_multiplier = 1.0
+            sentiment_score = self.current_sentiment  # 0-1 normalized
+            # Define action mappings (adjust based on your action space)
+            # Assuming: 0=hold, 1=buy, 2=sell, 3=close
+            bullish_threshold = 0.6
+            bearish_threshold = 0.4
+            if sentiment_score > bullish_threshold:  # Bullish
+                if action == 1:  # Buy
+                    sentiment_multiplier += self.sentiment_influence * self.sentiment_confidence
+                elif action == 2:  # Sell short
+                    sentiment_multiplier -= self.sentiment_influence * 0.3 * self.sentiment_confidence
+                elif action == 3:  # Close
+                    sentiment_multiplier -= self.sentiment_influence * 0.2 * self.sentiment_confidence
+            elif sentiment_score < bearish_threshold:  # Bearish
+                if action == 2:  # Sell short
+                    sentiment_multiplier += self.sentiment_influence * self.sentiment_confidence
+                elif action == 1:  # Buy
+                    sentiment_multiplier -= self.sentiment_influence * 0.5 * self.sentiment_confidence
+                elif action == 3:  # Close
+                    sentiment_multiplier += self.sentiment_influence * 0.3 * self.sentiment_confidence
+            # Apply trend momentum if enough history
+            trend_multiplier = self._calculate_sentiment_trend_multiplier()
+            sentiment_multiplier += trend_multiplier
+            # Clamp multiplier to reasonable bounds
+            sentiment_multiplier = np.clip(sentiment_multiplier, 0.5, 2.0)
+            enhanced_reward = original_reward * sentiment_multiplier
+            # Ensure reward doesn't become extreme
+            max_reward = abs(original_reward) * 2.5 if original_reward != 0 else 10.0
+            return np.clip(enhanced_reward, -max_reward, max_reward)
+        except Exception as e:
+            logger.error(f"Error in sentiment reward calculation: {e}")
+            return original_reward
+    def _calculate_sentiment_trend_multiplier(self) -> float:
+        """Calculate trend-based multiplier from sentiment history"""
+        if len(self.sentiment_history) < 10:
+            return 0.0
+        try:
+            # Get recent and previous sentiment values
+            recent_sentiments = [h['sentiment'] for h in list(self.sentiment_history)[-5:]]
+            prev_sentiments = [h['sentiment'] for h in list(self.sentiment_history)[-10:-5]]
+            recent_avg = np.mean(recent_sentiments)
+            prev_avg = np.mean(prev_sentiments)
+            trend = recent_avg - prev_avg
+            # Scale trend influence
+            trend_multiplier = np.tanh(trend * 5) * self.sentiment_influence * 0.3
+            return float(trend_multiplier)
+        except Exception as e:
+            logger.warning(f"Error calculating trend multiplier: {e}")
+            return 0.0
+    def _enhance_observation(self, original_observation: np.ndarray) -> np.ndarray:
+        """Enhance observation with sentiment information"""
+        if not self.use_sentiment or original_observation is None:
+            return original_observation
+        try:
+            # For now, return original observation
+            # Future: could concatenate sentiment as additional channels or metadata
+            return original_observation.copy()
+        except Exception as e:
+            logger.warning(f"Error enhancing observation: {e}")
+            return original_observation
+    def _get_safe_observation(self) -> np.ndarray:
+        """Get a safe default observation"""
+        try:
+            # Try to get current observation from base env
+            if hasattr(self, 'current_observation'):
+                return self.current_observation.copy()
+            # Return zero observation of expected shape
+            return np.zeros((84, 84, 4), dtype=np.float32)
+        except:
+            return np.zeros((84, 84, 4), dtype=np.float32)
+    def get_sentiment_analysis(self) -> Dict[str, Any]:
+        """Get detailed sentiment analysis with safety checks"""
         if not self.use_sentiment:
+            return {"error": "Sentiment analysis disabled", "sentiment": 0.5, "confidence": 0.0}
+        try:
+            trend_direction = self._calculate_sentiment_trend_direction()
+            return {
+                "current_sentiment": float(self.current_sentiment),
+                "sentiment_confidence": float(self.sentiment_confidence),
+                "sentiment_trend": trend_direction,
+                "influence_level": float(self.sentiment_influence),
+                "history_length": len(self.sentiment_history),
+                "update_freq": self.sentiment_update_freq,
+                "last_update_step": self.current_step
+            }
+        except Exception as e:
+            logger.error(f"Error in get_sentiment_analysis: {e}")
+            return {
+                "error": str(e),
+                "sentiment": 0.5,
+                "confidence": 0.0,
+                "trend": "unknown"
+            }
+    def _calculate_sentiment_trend_direction(self) -> str:
         """Calculate sentiment trend direction"""
         if len(self.sentiment_history) < 5:
+            return "insufficient_data"
+        try:
+            recent_avg = np.mean([h['sentiment'] for h in list(self.sentiment_history)[-5:]])
+            prev_avg = np.mean([h['sentiment'] for h in list(self.sentiment_history)[-10:-5]]) if len(self.sentiment_history) >= 10 else recent_avg
+            diff = recent_avg - prev_avg
+            if diff > 0.05:
+                return "bullish"
+            elif diff < -0.05:
+                return "bearish"
+            else:
+                return "neutral"
+        except:
+            return "error"
+    def reset(self) -> np.ndarray:
+        """Reset environment with sentiment state"""
+        try:
+            observation = super().reset()
+            self.current_step = 0
+            self.sentiment_history.clear()
+            self.current_sentiment = 0.5
+            self.sentiment_confidence = 0.0
+            logger.info("Environment reset with sentiment state")
+            return observation
+        except Exception as e:
+            logger.error(f"Error in reset: {e}")
+            # Force safe reset
+            super().reset()
+            self.current_step = 0
+            self.sentiment_history.clear()
+            return np.zeros((84, 84, 4), dtype=np.float32)
+    @property
+    def action_space_size(self) -> int:
+        """Get action space size from base environment"""
+        try:
+            return super().action_space.n if hasattr(super(), 'action_space') else 4
+        except:
+            return 4  # Default assumption