Spaces:

OmidSakaki
/

VisualTradingAI

Sleeping

App Files Files Community

OmidSakaki commited on Oct 16, 2025

Commit

d9381d2

verified ·

1 Parent(s): cd19213

Update src/sentiment/twitter_analyzer.py

Browse files

Files changed (1) hide show

src/sentiment/twitter_analyzer.py +371 -265

src/sentiment/twitter_analyzer.py CHANGED Viewed

@@ -1,18 +1,30 @@
 import torch
 from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline
 from textblob import TextBlob
 from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
-import numpy as np
-from typing import Dict, List, Tuple
 import time
 from datetime import datetime, timedelta
 import re
 class AdvancedSentimentAnalyzer:
-    def __init__(self):
         self.sentiment_models = {}
-        self.vader_analyzer = SentimentIntensityAnalyzer()
-        self.influencers = {
             'elonmusk': {'name': 'Elon Musk', 'weight': 0.9, 'sector': 'all'},
             'cz_binance': {'name': 'Changpeng Zhao', 'weight': 0.8, 'sector': 'crypto'},
             'saylor': {'name': 'Michael Saylor', 'weight': 0.7, 'sector': 'bitcoin'},
@@ -21,204 +33,309 @@ class AdvancedSentimentAnalyzer:
             'peterlbrandt': {'name': 'Peter Brandt', 'weight': 0.8, 'sector': 'trading'},
             'nic__carter': {'name': 'Nic Carter', 'weight': 0.7, 'sector': 'crypto'},
             'avalancheavax': {'name': 'Avalanche', 'weight': 0.6, 'sector': 'defi'}
-        }
-    def initialize_models(self):
-        """Initialize all sentiment analysis models"""
         try:
-            # Financial sentiment model
-            self.sentiment_models['financial'] = pipeline(
-                "sentiment-analysis",
-                model="mrm8488/distilroberta-finetuned-financial-news-sentiment-analysis",
-                tokenizer="mrm8488/distilroberta-finetuned-financial-news-sentiment-analysis"
-            )
-            # General sentiment model
-            self.sentiment_models['general'] = pipeline("sentiment-analysis")
-            # Crypto-specific model
             try:
-                self.sentiment_models['crypto'] = pipeline(
                     "sentiment-analysis",
-                    model="ElKulako/cryptobert",
-                    tokenizer="ElKulako/cryptobert"
                 )
-            except:
-                self.sentiment_models['crypto'] = self.sentiment_models['financial']
-            print("✅ All sentiment models loaded successfully!")
-            return True
         except Exception as e:
-            print(f"❌ Error loading models: {e}")
             return False
     def analyze_text_sentiment(self, text: str) -> Dict:
-        """Comprehensive sentiment analysis using multiple models"""
-        if not text or len(text.strip()) < 10:
             return self._default_sentiment()
         try:
-            # Clean text
             cleaned_text = self._clean_text(text)
-            # Analyze with multiple models
-            financial_sentiment = self._analyze_financial(cleaned_text)
-            general_sentiment = self._analyze_general(cleaned_text)
-            crypto_sentiment = self._analyze_crypto(cleaned_text)
-            vader_sentiment = self._analyze_vader(cleaned_text)
-            textblob_sentiment = self._analyze_textblob(cleaned_text)
-            # Combine results with weights
-            sentiments = [
-                (financial_sentiment['score'], 0.3),
-                (general_sentiment['score'], 0.2),
-                (crypto_sentiment['score'], 0.25),
-                (vader_sentiment['compound'], 0.15),
-                (textblob_sentiment['polarity'], 0.1)
-            ]
-            weighted_score = sum(score * weight for score, weight in sentiments)
-            confidence = np.mean([
-                financial_sentiment['confidence'],
-                general_sentiment['confidence'],
-                crypto_sentiment['confidence'],
-                vader_sentiment['confidence'],
-                textblob_sentiment['confidence']
-            ])
-            # Determine sentiment label
-            if weighted_score > 0.6:
-                sentiment_label = "bullish"
-            elif weighted_score > 0.4:
-                sentiment_label = "neutral"
             else:
-                sentiment_label = "bearish"
-            # Extract keywords and urgency
-            keywords = self._extract_keywords(cleaned_text)
-            urgency = self._detect_urgency(cleaned_text)
-            return {
                 "sentiment": sentiment_label,
-                "score": float(weighted_score),
-                "confidence": float(confidence),
-                "urgency": urgency,
-                "keywords": keywords,
-                "models_used": len([s for s in sentiments if s[0] != 0.5]),
                 "text_snippet": cleaned_text[:100] + "..." if len(cleaned_text) > 100 else cleaned_text
             }
         except Exception as e:
-            print(f"Error in sentiment analysis: {e}")
             return self._default_sentiment()
-    def _analyze_financial(self, text: str) -> Dict:
-        """Analyze with financial sentiment model"""
         try:
-            result = self.sentiment_models['financial'](text)[0]
-            score_map = {"negative": 0.0, "neutral": 0.5, "positive": 1.0}
-            return {
-                'score': score_map.get(result['label'].lower(), 0.5),
-                'confidence': result['score']
             }
-        except:
-            return {'score': 0.5, 'confidence': 0.0}
-    def _analyze_general(self, text: str) -> Dict:
-        """Analyze with general sentiment model"""
-        try:
-            result = self.sentiment_models['general'](text)[0]
-            score_map = {"negative": 0.0, "neutral": 0.5, "positive": 1.0}
             return {
-                'score': score_map.get(result['label'].lower(), 0.5),
-                'confidence': result['score']
             }
-        except:
-            return {'score': 0.5, 'confidence': 0.0}
-    def _analyze_crypto(self, text: str) -> Dict:
-        """Analyze with crypto-specific model"""
-        try:
-            result = self.sentiment_models['crypto'](text)[0]
-            score_map = {"negative": 0.0, "neutral": 0.5, "positive": 1.0}
-            return {
-                'score': score_map.get(result['label'].lower(), 0.5),
-                'confidence': result['score']
-            }
-        except:
-            return {'score': 0.5, 'confidence': 0.0}
     def _analyze_vader(self, text: str) -> Dict:
-        """Analyze with VADER sentiment analyzer"""
         try:
             scores = self.vader_analyzer.polarity_scores(text)
             return {
-                'compound': (scores['compound'] + 1) / 2,  # Convert to 0-1 scale
-                'confidence': abs(scores['compound'])
             }
-        except:
-            return {'compound': 0.5, 'confidence': 0.0}
     def _analyze_textblob(self, text: str) -> Dict:
-        """Analyze with TextBlob"""
         try:
             analysis = TextBlob(text)
             return {
-                'polarity': (analysis.sentiment.polarity + 1) / 2,  # Convert to 0-1 scale
-                'confidence': abs(analysis.sentiment.polarity)
             }
-        except:
-            return {'polarity': 0.5, 'confidence': 0.0}
     def _clean_text(self, text: str) -> str:
-        """Clean and preprocess text"""
-        # Remove URLs
-        text = re.sub(r'http\S+', '', text)
-        # Remove mentions and hashtags but keep the text
-        text = re.sub(r'@\w+', '', text)
-        text = re.sub(r'#', '', text)
-        # Remove extra whitespace
-        text = ' '.join(text.split())
-        return text.strip()
     def _extract_keywords(self, text: str) -> List[str]:
-        """Extract relevant financial keywords"""
-        financial_keywords = {
-            'bullish': ['moon', 'rocket', 'bull', 'buy', 'long', 'growth', 'opportunity'],
-            'bearish': ['crash', 'bear', 'sell', 'short', 'drop', 'warning', 'risk'],
-            'crypto': ['bitcoin', 'btc', 'ethereum', 'eth', 'crypto', 'blockchain', 'defi'],
-            'urgency': ['now', 'urgent', 'immediately', 'alert', 'breaking']
         }
-        found_keywords = []
         text_lower = text.lower()
-        for category, keywords in financial_keywords.items():
             for keyword in keywords:
-                if keyword in text_lower:
-                    found_keywords.append(f"{category}:{keyword}")
-        return found_keywords[:5]  # Return top 5 keywords
     def _detect_urgency(self, text: str) -> float:
-        """Detect urgency level in text"""
-        urgency_indicators = ['!', 'urgent', 'breaking', 'alert', 'immediately', 'now']
         text_lower = text.lower()
-        urgency_score = 0.0
         for indicator in urgency_indicators:
-            if indicator in text_lower:
-                urgency_score += 0.2
-        # Count exclamation marks
-        exclamation_count = text.count('!')
-        urgency_score += min(exclamation_count * 0.1, 0.3)
-        return min(urgency_score, 1.0)
     def _default_sentiment(self) -> Dict:
-        """Return default sentiment when analysis fails"""
         return {
             "sentiment": "neutral",
             "score": 0.5,
@@ -230,160 +347,149 @@ class AdvancedSentimentAnalyzer:
         }
     def get_influencer_sentiment(self, hours_back: int = 24) -> Dict:
-        """Get sentiment analysis from multiple influencers"""
-        all_tweets = self._generate_synthetic_tweets(hours_back)
-        influencer_sentiments = {}
-        for username, tweet_batch in all_tweets.items():
-            tweet_sentiments = []
-            for tweet in tweet_batch:
-                sentiment = self.analyze_text_sentiment(tweet['text'])
-                sentiment['timestamp'] = tweet['timestamp']
-                sentiment['username'] = username
-                tweet_sentiments.append(sentiment)
-            if tweet_sentiments:
-                avg_score = np.mean([t['score'] for t in tweet_sentiments])
-                avg_confidence = np.mean([t['confidence'] for t in tweet_sentiments])
-                influencer_sentiments[username] = {
-                    'score': avg_score,
-                    'confidence': avg_confidence,
-                    'weight': self.influencers[username]['weight'],
-                    'tweet_count': len(tweet_sentiments),
-                    'recent_tweets': tweet_sentiments[:2]  # Last 2 tweets
-                }
-        # Calculate weighted market sentiment
-        if influencer_sentiments:
-            total_weighted_score = 0
-            total_weight = 0
-            for username, data in influencer_sentiments.items():
-                total_weighted_score += data['score'] * data['weight']
-                total_weight += data['weight']
-            market_sentiment = total_weighted_score / total_weight if total_weight > 0 else 0.5
-        else:
-            market_sentiment = 0.5
-        return {
-            "market_sentiment": market_sentiment,
-            "confidence": np.mean([d['confidence'] for d in influencer_sentiments.values()]) if influencer_sentiments else 0.0,
-            "influencer_count": len(influencer_sentiments),
-            "total_tweets": sum(d['tweet_count'] for d in influencer_sentiments.values()),
-            "breakdown": influencer_sentiments,
-            "timestamp": datetime.now().isoformat()
-        }
     def _generate_synthetic_tweets(self, hours_back: int) -> Dict:
-        """Generate realistic synthetic tweets based on market simulation"""
         current_time = time.time()
         tweets = {}
-        # Market condition simulation
-        market_trend = np.sin(current_time / 3600) * 0.3 + 0.5  # Oscillating trend
-        for username, info in self.influencers.items():
             user_tweets = []
-            base_sentiment = market_trend + np.random.normal(0, 0.1)
-            base_sentiment = max(0.1, min(0.9, base_sentiment))
-            tweet_templates = self._get_user_templates(username, base_sentiment)
-            for i in range(np.random.randint(2, 6)):  # 2-5 tweets per user
-                template = np.random.choice(tweet_templates)
-                tweet_text = template['text']
-                # Add some randomness
-                if np.random.random() < 0.3:
-                    tweet_text += " " + np.random.choice(["🚀", "📈", "📉", "💎", "🔥"])
                 user_tweets.append({
                     'text': tweet_text,
-                    'timestamp': current_time - (i * 3600 * np.random.uniform(1, 4))
                 })
             tweets[username] = user_tweets
         return tweets
-    def _get_user_templates(self, username: str, base_sentiment: float) -> List[Dict]:
-        """Get tweet templates based on user personality and sentiment"""
-        bullish_templates = {
-            'elonmusk': [
-                "The future is bright for digital assets! 🚀",
-                "Adoption is accelerating faster than expected 📈",
-                "Just added more to my position 💪",
-                "Technology is evolving at an incredible pace 🌟"
-            ],
-            'cz_binance': [
-                "Strong fundamentals in the crypto space 📊",
-                "Building for the next billion users 🏗️",
-                "Innovation continues across the ecosystem 🔄",
-                "Positive regulatory developments emerging ⚖️"
-            ],
-            'saylor': [
-                "Bitcoin represents digital excellence 💎",
-                "The macroeconomic picture supports growth 📈",
-                "Institutional adoption is accelerating 🏦",
-                "Technology is the future of finance 🔮"
-            ]
-        }
-        bearish_templates = {
-            'elonmusk': [
-                "Market conditions looking challenging 🌧️",
-                "Need to see more adoption for sustained growth 📉",
-                "Regulatory concerns are weighing on sentiment ⚖️",
-                "Volatility is higher than expected 📊"
             ],
-            'cz_binance': [
-                "Market experiencing normal corrections 📉",
-                "Important to manage risk in current environment 🛡️",
-                "Short-term volatility doesn't change long-term thesis 🔄",
-                "Focus on fundamentals over price action 📊"
             ],
-            'saylor': [
-                "Short-term price action doesn't matter for long-term holders 💎",
-                "Focus on the technology, not the noise 🔇",
-                "Market cycles are normal and expected 🔄",
-                "Education is key during volatile periods 📚"
             ]
         }
-        neutral_templates = {
-            'elonmusk': [
-                "Interesting developments in the space 🤔",
-                "Keeping an eye on market movements 👀",
-                "Technology continues to evolve 🔧",
-                "The journey continues 🛣️"
-            ],
-            'cz_binance': [
-                "Monitoring market conditions 📊",
-                "Continuing to build through all markets 🏗️",
-                "Focus on long-term development 🎯",
-                "Ecosystem growth continues 🌱"
-            ],
-            'saylor': [
-                "Bitcoin education is important 📖",
-                "Understanding the technology is key 🔑",
-                "Market cycles are part of growth 🔄",
-                "Focus on the fundamentals 📊"
-            ]
-        }
-        # Default templates for unknown users
-        default_templates = {
-            'bullish': ["Market looking good!", "Positive developments ahead", "Growth continues"],
-            'bearish': ["Market challenges ahead", "Caution advised", "Volatility expected"],
-            'neutral': ["Monitoring developments", "Interesting times", "Continuing to watch"]
         }
-        if base_sentiment > 0.6:
-            templates = bullish_templates.get(username, default_templates['bullish'])
-        elif base_sentiment < 0.4:
-            templates = bearish_templates.get(username, default_templates['bearish'])
         else:
-            templates = neutral_templates.get(username, default_templates['neutral'])
-        return [{'text': template} for template in templates]

 import torch
+import numpy as np
 from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline
 from textblob import TextBlob
 from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
+from typing import Dict, List, Tuple, Optional
 import time
 from datetime import datetime, timedelta
 import re
+import logging
+from functools import lru_cache
+import warnings
+warnings.filterwarnings('ignore')
+# Setup logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
 class AdvancedSentimentAnalyzer:
+    def __init__(self, max_model_retries=3, cache_size=100):
         self.sentiment_models = {}
+        self.vader_analyzer = None
+        self.max_model_retries = max_model_retries
+        self.cache = {}  # Simple cache for expensive operations
+        # Influencers with validation
+        self.influencers = self._validate_influencers({
             'elonmusk': {'name': 'Elon Musk', 'weight': 0.9, 'sector': 'all'},
             'cz_binance': {'name': 'Changpeng Zhao', 'weight': 0.8, 'sector': 'crypto'},
             'saylor': {'name': 'Michael Saylor', 'weight': 0.7, 'sector': 'bitcoin'},
             'peterlbrandt': {'name': 'Peter Brandt', 'weight': 0.8, 'sector': 'trading'},
             'nic__carter': {'name': 'Nic Carter', 'weight': 0.7, 'sector': 'crypto'},
             'avalancheavax': {'name': 'Avalanche', 'weight': 0.6, 'sector': 'defi'}
+        })
+        self._initialize_vader()
+    def _validate_influencers(self, influencers: Dict) -> Dict:
+        """Validate and normalize influencer weights"""
+        validated = {}
+        total_weight = 0
+        for username, data in influencers.items():
+            if 0.0 <= data.get('weight', 0) <= 1.0:
+                validated[username] = data
+                total_weight += data['weight']
+        # Normalize weights to sum to 1
+        if total_weight > 0:
+            for username in validated:
+                validated[username]['weight'] /= total_weight
+        logger.info(f"Validated {len(validated)} influencers with total weight {total_weight:.2f}")
+        return validated
+    def _initialize_vader(self):
+        """Initialize VADER safely"""
         try:
+            self.vader_analyzer = SentimentIntensityAnalyzer()
+            logger.info("VADER analyzer initialized")
+        except Exception as e:
+            logger.warning(f"Failed to initialize VADER: {e}")
+            self.vader_analyzer = None
+    @lru_cache(maxsize=128)
+    def _safe_pipeline_load(self, model_name: str):
+        """Safely load pipeline with caching and retries"""
+        for attempt in range(self.max_model_retries):
             try:
+                pipeline_obj = pipeline(
                     "sentiment-analysis",
+                    model=model_name,
+                    tokenizer=model_name,
+                    device=-1,  # CPU only for stability
+                    return_all_scores=False
                 )
+                logger.info(f"Successfully loaded model: {model_name}")
+                return pipeline_obj
+            except Exception as e:
+                logger.warning(f"Attempt {attempt + 1} failed for {model_name}: {e}")
+                if attempt == self.max_model_retries - 1:
+                    return None
+                time.sleep(1)  # Brief delay before retry
+    def initialize_models(self) -> bool:
+        """Initialize all sentiment analysis models with fallback"""
+        success_count = 0
+        try:
+            # Financial sentiment model
+            financial_model = self._safe_pipeline_load(
+                "mrm8488/distilroberta-finetuned-financial-news-sentiment-analysis"
+            )
+            if financial_model:
+                self.sentiment_models['financial'] = financial_model
+                success_count += 1
+            # General sentiment model with fallback
+            general_model = self._safe_pipeline_load("distilbert-base-uncased-finetuned-sst-2-english")
+            if general_model:
+                self.sentiment_models['general'] = general_model
+                success_count += 1
+            else:
+                # Fallback to basic pipeline
+                try:
+                    self.sentiment_models['general'] = pipeline("sentiment-analysis")
+                    success_count += 1
+                except:
+                    pass
+            # Crypto-specific model with fallback
+            crypto_model = self._safe_pipeline_load("ElKulako/cryptobert")
+            if crypto_model:
+                self.sentiment_models['crypto'] = crypto_model
+                success_count += 1
+            else:
+                self.sentiment_models['crypto'] = self.sentiment_models.get('financial',
+                                                                          self.sentiment_models.get('general'))
+                success_count += 1 if self.sentiment_models['crypto'] else 0
+            # At least one model should be available
+            if success_count > 0:
+                logger.info(f"✅ Loaded {success_count} sentiment models successfully!")
+                return True
+            else:
+                logger.error("❌ No sentiment models could be loaded")
+                return False
         except Exception as e:
+            logger.error(f"❌ Critical error loading models: {e}")
             return False
     def analyze_text_sentiment(self, text: str) -> Dict:
+        """Comprehensive sentiment analysis with robust error handling"""
+        if not text or len(text.strip()) < 5:
             return self._default_sentiment()
+        cache_key = hash(text.strip()[:100])  # Simple cache key
+        if cache_key in self.cache:
+            return self.cache[cache_key]
         try:
             cleaned_text = self._clean_text(text)
+            # Analyze with available models
+            model_results = []
+            # Financial model
+            if 'financial' in self.sentiment_models:
+                model_results.append(self._analyze_model(cleaned_text, 'financial'))
+            # General model
+            if 'general' in self.sentiment_models:
+                model_results.append(self._analyze_model(cleaned_text, 'general'))
+            # Crypto model
+            if 'crypto' in self.sentiment_models:
+                model_results.append(self._analyze_model(cleaned_text, 'crypto'))
+            # Rule-based models
+            if self.vader_analyzer:
+                model_results.append(self._analyze_vader(cleaned_text))
+            model_results.append(self._analyze_textblob(cleaned_text))
+            # Filter valid results
+            valid_results = [r for r in model_results if r['score'] is not None]
+            if not valid_results:
+                return self._default_sentiment()
+            # Weighted combination (prioritize financial/crypto models)
+            weights = {
+                'financial': 0.35, 'crypto': 0.30, 'general': 0.20,
+                'vader': 0.10, 'textblob': 0.05
+            }
+            weighted_score = 0.0
+            total_weight = 0.0
+            confidences = []
+            for result in valid_results:
+                model_type = result.get('model_type', 'unknown')
+                weight = weights.get(model_type, 0.1)
+                weighted_score += result['score'] * weight
+                total_weight += weight
+                if 'confidence' in result:
+                    confidences.append(result['confidence'])
+            if total_weight > 0:
+                final_score = weighted_score / total_weight
+                final_confidence = np.mean(confidences) if confidences else 0.0
             else:
+                final_score = 0.5
+                final_confidence = 0.0
+            # Determine sentiment label
+            sentiment_label = self._score_to_label(final_score)
+            result = {
                 "sentiment": sentiment_label,
+                "score": float(final_score),
+                "confidence": float(final_confidence),
+                "urgency": self._detect_urgency(cleaned_text),
+                "keywords": self._extract_keywords(cleaned_text),
+                "models_used": len(valid_results),
                 "text_snippet": cleaned_text[:100] + "..." if len(cleaned_text) > 100 else cleaned_text
             }
+            # Cache result
+            self.cache[cache_key] = result
+            if len(self.cache) > 50:  # Limit cache size
+                self.cache.pop(next(iter(self.cache)))
+            return result
         except Exception as e:
+            logger.error(f"Error in sentiment analysis: {e}")
             return self._default_sentiment()
+    def _analyze_model(self, text: str, model_type: str) -> Dict:
+        """Generic model analysis with error handling"""
         try:
+            model = self.sentiment_models[model_type]
+            result = model(text[:512], truncation=True, max_length=512)[0]  # Limit text length
+            score_map = {
+                'negative': 0.0, 'NEGATIVE': 0.0,
+                'neutral': 0.5, 'NEUTRAL': 0.5,
+                'positive': 1.0, 'POSITIVE': 1.0
             }
+            score = score_map.get(result['label'].upper(), 0.5)
             return {
+                'score': score,
+                'confidence': result['score'],
+                'model_type': model_type
             }
+        except Exception as e:
+            logger.debug(f"Model {model_type} failed: {e}")
+            return {'score': None, 'confidence': 0.0, 'model_type': model_type}
+    def _score_to_label(self, score: float) -> str:
+        """Convert score to sentiment label"""
+        if score > 0.6:
+            return "bullish"
+        elif score > 0.4:
+            return "neutral"
+        else:
+            return "bearish"
     def _analyze_vader(self, text: str) -> Dict:
+        """VADER analysis with error handling"""
+        if not self.vader_analyzer:
+            return {'score': None, 'confidence': 0.0, 'model_type': 'vader'}
         try:
             scores = self.vader_analyzer.polarity_scores(text)
+            compound = (scores['compound'] + 1) / 2  # Normalize to 0-1
             return {
+                'score': compound,
+                'confidence': abs(scores['compound']),
+                'model_type': 'vader'
             }
+        except Exception:
+            return {'score': None, 'confidence': 0.0, 'model_type': 'vader'}
     def _analyze_textblob(self, text: str) -> Dict:
+        """TextBlob analysis with error handling"""
         try:
             analysis = TextBlob(text)
+            polarity = (analysis.sentiment.polarity + 1) / 2  # Normalize to 0-1
             return {
+                'score': polarity,
+                'confidence': abs(analysis.sentiment.polarity),
+                'model_type': 'textblob'
             }
+        except Exception:
+            return {'score': None, 'confidence': 0.0, 'model_type': 'textblob'}
     def _clean_text(self, text: str) -> str:
+        """Enhanced text cleaning"""
+        try:
+            # Remove URLs
+            text = re.sub(r'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\\(\\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+', '', text)
+            # Remove mentions
+            text = re.sub(r'@\w+', '', text)
+            # Remove hashtags but keep text
+            text = re.sub(r'#\w+', '', text)
+            # Remove extra whitespace and normalize
+            text = ' '.join(text.split())
+            return text.strip()
+        except:
+            return text[:200] if len(text) > 200 else text
     def _extract_keywords(self, text: str) -> List[str]:
+        """Extract financial keywords with better matching"""
+        keyword_categories = {
+            'bullish': ['moon', 'rocket', 'bull', 'buy', 'long', 'growth', 'opportunity', 'bullrun'],
+            'bearish': ['crash', 'bear', 'sell', 'short', 'drop', 'dump', 'warning', 'risk', 'fud'],
+            'crypto': ['bitcoin', 'btc', 'ethereum', 'eth', 'crypto', 'blockchain', 'defi', 'nft'],
+            'urgency': ['now', 'urgent', 'immediately', 'alert', 'breaking', 'huge']
         }
+        found = []
         text_lower = text.lower()
+        for category, keywords in keyword_categories.items():
             for keyword in keywords:
+                if re.search(rf'\b{keyword}\b', text_lower):
+                    found.append(f"{category}:{keyword}")
+        return found[:5]
     def _detect_urgency(self, text: str) -> float:
+        """Improved urgency detection"""
+        urgency_indicators = ['!', 'urgent', 'breaking', 'alert', 'immediately', 'now', 'huge', 'massive']
         text_lower = text.lower()
+        score = 0.0
         for indicator in urgency_indicators:
+            if re.search(rf'\b{indicator}\b', text_lower):
+                score += 0.15
+        # Exclamation and question marks
+        punctuation_count = text.count('!') + text.count('?')
+        score += min(punctuation_count * 0.1, 0.3)
+        # Caps lock indicator
+        caps_ratio = sum(1 for c in text if c.isupper()) / len([c for c in text if c.isalpha()])
+        score += min(caps_ratio * 0.5, 0.2)
+        return min(score, 1.0)
     def _default_sentiment(self) -> Dict:
+        """Safe default sentiment"""
         return {
             "sentiment": "neutral",
             "score": 0.5,
         }
     def get_influencer_sentiment(self, hours_back: int = 24) -> Dict:
+        """Get weighted influencer sentiment with caching"""
+        try:
+            # Generate synthetic tweets (in production, replace with real API)
+            tweets = self._generate_synthetic_tweets(hours_back)
+            influencer_sentiments = {}
+            for username, tweet_batch in tweets.items():
+                if username not in self.influencers:
+                    continue
+                tweet_sentiments = []
+                for tweet in tweet_batch:
+                    sentiment = self.analyze_text_sentiment(tweet['text'])
+                    sentiment.update({
+                        'timestamp': tweet['timestamp'],
+                        'username': username
+                    })
+                    tweet_sentiments.append(sentiment)
+                if tweet_sentiments:
+                    # Weighted average by confidence
+                    total_weighted = sum(s['score'] * s['confidence'] for s in tweet_sentiments)
+                    total_confidence = sum(s['confidence'] for s in tweet_sentiments)
+                    avg_score = total_weighted / total_confidence if total_confidence > 0 else 0.5
+                    avg_confidence = np.mean([s['confidence'] for s in tweet_sentiments])
+                    influencer_sentiments[username] = {
+                        'score': float(avg_score),
+                        'confidence': float(avg_confidence),
+                        'weight': self.influencers[username]['weight'],
+                        'tweet_count': len(tweet_sentiments),
+                        'tweets': tweet_sentiments[:3]
+                    }
+            # Calculate market sentiment
+            if influencer_sentiments:
+                total_weighted_score = sum(
+                    data['score'] * data['weight'] * data['confidence']
+                    for data in influencer_sentiments.values()
+                )
+                total_weight = sum(
+                    data['weight'] * data['confidence']
+                    for data in influencer_sentiments.values()
+                )
+                market_sentiment = (total_weighted_score / total_weight
+                                  if total_weight > 0 else 0.5)
+                avg_confidence = np.mean([d['confidence'] for d in influencer_sentiments.values()])
+            else:
+                market_sentiment = 0.5
+                avg_confidence = 0.0
+            return {
+                "market_sentiment": float(market_sentiment),
+                "confidence": float(avg_confidence),
+                "influencer_count": len(influencer_sentiments),
+                "total_tweets": sum(d['tweet_count'] for d in influencer_sentiments.values()),
+                "timestamp": datetime.now().isoformat(),
+                "influencers": influencer_sentiments
+            }
+        except Exception as e:
+            logger.error(f"Error in get_influencer_sentiment: {e}")
+            return {
+                "market_sentiment": 0.5,
+                "confidence": 0.0,
+                "error": str(e),
+                "timestamp": datetime.now().isoformat()
+            }
     def _generate_synthetic_tweets(self, hours_back: int) -> Dict:
+        """Generate realistic synthetic tweets for testing"""
         current_time = time.time()
         tweets = {}
+        np.random.seed(int(current_time) % 10000)  # Reproducible randomness
+        # Simulate market conditions
+        market_trend = np.sin(current_time / 3600) * 0.3 + 0.5
+        for username in self.influencers:
             user_tweets = []
+            base_sentiment = np.clip(market_trend + np.random.normal(0, 0.15), 0.1, 0.9)
+            templates = self._get_user_templates(username, base_sentiment)
+            for i in range(np.random.randint(1, 4)):  # 1-3 tweets
+                template = np.random.choice(templates)
+                tweet_text = template.format(**self._get_template_vars(base_sentiment))
+                # Add emojis occasionally
+                if np.random.random() < 0.4:
+                    emojis = self._get_relevant_emojis(base_sentiment)
+                    tweet_text += " " + np.random.choice(emojis)
                 user_tweets.append({
                     'text': tweet_text,
+                    'timestamp': current_time - (i * 3600 * np.random.uniform(0.5, hours_back))
                 })
             tweets[username] = user_tweets
         return tweets
+    def _get_user_templates(self, username: str, sentiment: float) -> List[str]:
+        """Get appropriate templates based on sentiment"""
+        templates = {
+            'bullish': [
+                "{action} looking strong! {emoji}",
+                "Great {topic} developments ahead 🚀",
+                "Bullish on {topic} {emoji}"
             ],
+            'bearish': [
+                "Caution on {topic} {emoji}",
+                "{action} facing challenges 📉",
+                "Bearish signals for {topic}"
             ],
+            'neutral': [
+                "Watching {topic} developments 👀",
+                "{action} market update 📊",
+                "Interesting {topic} news"
             ]
         }
+        category = 'bullish' if sentiment > 0.6 else 'bearish' if sentiment < 0.4 else 'neutral'
+        return templates[category]
+    def _get_template_vars(self, sentiment: float) -> Dict:
+        """Get variables for tweet templates"""
+        topics = ['BTC', 'crypto', 'market', 'DeFi']
+        actions = ['Bitcoin', 'ETH', 'market', 'altcoins']
+        return {
+            'topic': np.random.choice(topics),
+            'action': np.random.choice(actions),
+            'emoji': np.random.choice(['📈', '📉', '🚀', '💎'])
         }
+    def _get_relevant_emojis(self, sentiment: float) -> List[str]:
+        """Get sentiment-relevant emojis"""
+        if sentiment > 0.6:
+            return ['🚀', '📈', '💎', '🔥']
+        elif sentiment < 0.4:
+            return ['📉', '😬', '⚠️', '💥']
         else:
+            return ['📊', '👀', '🤔', '💭']