OmidSakaki commited on
Commit
d9381d2
·
verified ·
1 Parent(s): cd19213

Update src/sentiment/twitter_analyzer.py

Browse files
Files changed (1) hide show
  1. src/sentiment/twitter_analyzer.py +371 -265
src/sentiment/twitter_analyzer.py CHANGED
@@ -1,18 +1,30 @@
1
  import torch
 
2
  from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline
3
  from textblob import TextBlob
4
  from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
5
- import numpy as np
6
- from typing import Dict, List, Tuple
7
  import time
8
  from datetime import datetime, timedelta
9
  import re
 
 
 
 
 
 
 
 
10
 
11
  class AdvancedSentimentAnalyzer:
12
- def __init__(self):
13
  self.sentiment_models = {}
14
- self.vader_analyzer = SentimentIntensityAnalyzer()
15
- self.influencers = {
 
 
 
 
16
  'elonmusk': {'name': 'Elon Musk', 'weight': 0.9, 'sector': 'all'},
17
  'cz_binance': {'name': 'Changpeng Zhao', 'weight': 0.8, 'sector': 'crypto'},
18
  'saylor': {'name': 'Michael Saylor', 'weight': 0.7, 'sector': 'bitcoin'},
@@ -21,204 +33,309 @@ class AdvancedSentimentAnalyzer:
21
  'peterlbrandt': {'name': 'Peter Brandt', 'weight': 0.8, 'sector': 'trading'},
22
  'nic__carter': {'name': 'Nic Carter', 'weight': 0.7, 'sector': 'crypto'},
23
  'avalancheavax': {'name': 'Avalanche', 'weight': 0.6, 'sector': 'defi'}
24
- }
 
 
 
 
 
 
 
 
 
 
 
 
25
 
26
- def initialize_models(self):
27
- """Initialize all sentiment analysis models"""
 
 
 
 
 
 
 
 
28
  try:
29
- # Financial sentiment model
30
- self.sentiment_models['financial'] = pipeline(
31
- "sentiment-analysis",
32
- model="mrm8488/distilroberta-finetuned-financial-news-sentiment-analysis",
33
- tokenizer="mrm8488/distilroberta-finetuned-financial-news-sentiment-analysis"
34
- )
35
-
36
- # General sentiment model
37
- self.sentiment_models['general'] = pipeline("sentiment-analysis")
38
-
39
- # Crypto-specific model
40
  try:
41
- self.sentiment_models['crypto'] = pipeline(
42
  "sentiment-analysis",
43
- model="ElKulako/cryptobert",
44
- tokenizer="ElKulako/cryptobert"
 
 
45
  )
46
- except:
47
- self.sentiment_models['crypto'] = self.sentiment_models['financial']
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48
 
49
- print("✅ All sentiment models loaded successfully!")
50
- return True
 
 
 
 
 
 
 
51
 
 
 
 
 
 
 
 
 
52
  except Exception as e:
53
- print(f"❌ Error loading models: {e}")
54
  return False
55
 
56
  def analyze_text_sentiment(self, text: str) -> Dict:
57
- """Comprehensive sentiment analysis using multiple models"""
58
- if not text or len(text.strip()) < 10:
59
  return self._default_sentiment()
60
 
 
 
 
 
61
  try:
62
- # Clean text
63
  cleaned_text = self._clean_text(text)
64
 
65
- # Analyze with multiple models
66
- financial_sentiment = self._analyze_financial(cleaned_text)
67
- general_sentiment = self._analyze_general(cleaned_text)
68
- crypto_sentiment = self._analyze_crypto(cleaned_text)
69
- vader_sentiment = self._analyze_vader(cleaned_text)
70
- textblob_sentiment = self._analyze_textblob(cleaned_text)
71
 
72
- # Combine results with weights
73
- sentiments = [
74
- (financial_sentiment['score'], 0.3),
75
- (general_sentiment['score'], 0.2),
76
- (crypto_sentiment['score'], 0.25),
77
- (vader_sentiment['compound'], 0.15),
78
- (textblob_sentiment['polarity'], 0.1)
79
- ]
80
 
81
- weighted_score = sum(score * weight for score, weight in sentiments)
82
- confidence = np.mean([
83
- financial_sentiment['confidence'],
84
- general_sentiment['confidence'],
85
- crypto_sentiment['confidence'],
86
- vader_sentiment['confidence'],
87
- textblob_sentiment['confidence']
88
- ])
89
 
90
- # Determine sentiment label
91
- if weighted_score > 0.6:
92
- sentiment_label = "bullish"
93
- elif weighted_score > 0.4:
94
- sentiment_label = "neutral"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
95
  else:
96
- sentiment_label = "bearish"
 
97
 
98
- # Extract keywords and urgency
99
- keywords = self._extract_keywords(cleaned_text)
100
- urgency = self._detect_urgency(cleaned_text)
101
 
102
- return {
103
  "sentiment": sentiment_label,
104
- "score": float(weighted_score),
105
- "confidence": float(confidence),
106
- "urgency": urgency,
107
- "keywords": keywords,
108
- "models_used": len([s for s in sentiments if s[0] != 0.5]),
109
  "text_snippet": cleaned_text[:100] + "..." if len(cleaned_text) > 100 else cleaned_text
110
  }
111
 
 
 
 
 
 
 
 
112
  except Exception as e:
113
- print(f"Error in sentiment analysis: {e}")
114
  return self._default_sentiment()
115
 
116
- def _analyze_financial(self, text: str) -> Dict:
117
- """Analyze with financial sentiment model"""
118
  try:
119
- result = self.sentiment_models['financial'](text)[0]
120
- score_map = {"negative": 0.0, "neutral": 0.5, "positive": 1.0}
121
- return {
122
- 'score': score_map.get(result['label'].lower(), 0.5),
123
- 'confidence': result['score']
 
 
124
  }
125
- except:
126
- return {'score': 0.5, 'confidence': 0.0}
127
-
128
- def _analyze_general(self, text: str) -> Dict:
129
- """Analyze with general sentiment model"""
130
- try:
131
- result = self.sentiment_models['general'](text)[0]
132
- score_map = {"negative": 0.0, "neutral": 0.5, "positive": 1.0}
133
  return {
134
- 'score': score_map.get(result['label'].lower(), 0.5),
135
- 'confidence': result['score']
 
136
  }
137
- except:
138
- return {'score': 0.5, 'confidence': 0.0}
 
139
 
140
- def _analyze_crypto(self, text: str) -> Dict:
141
- """Analyze with crypto-specific model"""
142
- try:
143
- result = self.sentiment_models['crypto'](text)[0]
144
- score_map = {"negative": 0.0, "neutral": 0.5, "positive": 1.0}
145
- return {
146
- 'score': score_map.get(result['label'].lower(), 0.5),
147
- 'confidence': result['score']
148
- }
149
- except:
150
- return {'score': 0.5, 'confidence': 0.0}
151
 
152
  def _analyze_vader(self, text: str) -> Dict:
153
- """Analyze with VADER sentiment analyzer"""
 
 
 
154
  try:
155
  scores = self.vader_analyzer.polarity_scores(text)
 
156
  return {
157
- 'compound': (scores['compound'] + 1) / 2, # Convert to 0-1 scale
158
- 'confidence': abs(scores['compound'])
 
159
  }
160
- except:
161
- return {'compound': 0.5, 'confidence': 0.0}
162
 
163
  def _analyze_textblob(self, text: str) -> Dict:
164
- """Analyze with TextBlob"""
165
  try:
166
  analysis = TextBlob(text)
 
167
  return {
168
- 'polarity': (analysis.sentiment.polarity + 1) / 2, # Convert to 0-1 scale
169
- 'confidence': abs(analysis.sentiment.polarity)
 
170
  }
171
- except:
172
- return {'polarity': 0.5, 'confidence': 0.0}
173
 
174
  def _clean_text(self, text: str) -> str:
175
- """Clean and preprocess text"""
176
- # Remove URLs
177
- text = re.sub(r'http\S+', '', text)
178
- # Remove mentions and hashtags but keep the text
179
- text = re.sub(r'@\w+', '', text)
180
- text = re.sub(r'#', '', text)
181
- # Remove extra whitespace
182
- text = ' '.join(text.split())
183
- return text.strip()
 
 
 
 
184
 
185
  def _extract_keywords(self, text: str) -> List[str]:
186
- """Extract relevant financial keywords"""
187
- financial_keywords = {
188
- 'bullish': ['moon', 'rocket', 'bull', 'buy', 'long', 'growth', 'opportunity'],
189
- 'bearish': ['crash', 'bear', 'sell', 'short', 'drop', 'warning', 'risk'],
190
- 'crypto': ['bitcoin', 'btc', 'ethereum', 'eth', 'crypto', 'blockchain', 'defi'],
191
- 'urgency': ['now', 'urgent', 'immediately', 'alert', 'breaking']
192
  }
193
 
194
- found_keywords = []
195
  text_lower = text.lower()
196
 
197
- for category, keywords in financial_keywords.items():
198
  for keyword in keywords:
199
- if keyword in text_lower:
200
- found_keywords.append(f"{category}:{keyword}")
201
 
202
- return found_keywords[:5] # Return top 5 keywords
203
 
204
  def _detect_urgency(self, text: str) -> float:
205
- """Detect urgency level in text"""
206
- urgency_indicators = ['!', 'urgent', 'breaking', 'alert', 'immediately', 'now']
207
  text_lower = text.lower()
208
 
209
- urgency_score = 0.0
210
  for indicator in urgency_indicators:
211
- if indicator in text_lower:
212
- urgency_score += 0.2
 
 
 
 
213
 
214
- # Count exclamation marks
215
- exclamation_count = text.count('!')
216
- urgency_score += min(exclamation_count * 0.1, 0.3)
217
 
218
- return min(urgency_score, 1.0)
219
 
220
  def _default_sentiment(self) -> Dict:
221
- """Return default sentiment when analysis fails"""
222
  return {
223
  "sentiment": "neutral",
224
  "score": 0.5,
@@ -230,160 +347,149 @@ class AdvancedSentimentAnalyzer:
230
  }
231
 
232
  def get_influencer_sentiment(self, hours_back: int = 24) -> Dict:
233
- """Get sentiment analysis from multiple influencers"""
234
- all_tweets = self._generate_synthetic_tweets(hours_back)
235
- influencer_sentiments = {}
236
-
237
- for username, tweet_batch in all_tweets.items():
238
- tweet_sentiments = []
239
- for tweet in tweet_batch:
240
- sentiment = self.analyze_text_sentiment(tweet['text'])
241
- sentiment['timestamp'] = tweet['timestamp']
242
- sentiment['username'] = username
243
- tweet_sentiments.append(sentiment)
244
 
245
- if tweet_sentiments:
246
- avg_score = np.mean([t['score'] for t in tweet_sentiments])
247
- avg_confidence = np.mean([t['confidence'] for t in tweet_sentiments])
248
- influencer_sentiments[username] = {
249
- 'score': avg_score,
250
- 'confidence': avg_confidence,
251
- 'weight': self.influencers[username]['weight'],
252
- 'tweet_count': len(tweet_sentiments),
253
- 'recent_tweets': tweet_sentiments[:2] # Last 2 tweets
254
- }
255
-
256
- # Calculate weighted market sentiment
257
- if influencer_sentiments:
258
- total_weighted_score = 0
259
- total_weight = 0
 
 
 
 
 
 
 
 
 
 
 
 
 
260
 
261
- for username, data in influencer_sentiments.items():
262
- total_weighted_score += data['score'] * data['weight']
263
- total_weight += data['weight']
 
 
 
 
 
 
 
 
 
 
 
 
 
 
264
 
265
- market_sentiment = total_weighted_score / total_weight if total_weight > 0 else 0.5
266
- else:
267
- market_sentiment = 0.5
268
-
269
- return {
270
- "market_sentiment": market_sentiment,
271
- "confidence": np.mean([d['confidence'] for d in influencer_sentiments.values()]) if influencer_sentiments else 0.0,
272
- "influencer_count": len(influencer_sentiments),
273
- "total_tweets": sum(d['tweet_count'] for d in influencer_sentiments.values()),
274
- "breakdown": influencer_sentiments,
275
- "timestamp": datetime.now().isoformat()
276
- }
 
 
 
 
 
277
 
278
  def _generate_synthetic_tweets(self, hours_back: int) -> Dict:
279
- """Generate realistic synthetic tweets based on market simulation"""
280
  current_time = time.time()
281
  tweets = {}
 
282
 
283
- # Market condition simulation
284
- market_trend = np.sin(current_time / 3600) * 0.3 + 0.5 # Oscillating trend
285
 
286
- for username, info in self.influencers.items():
287
  user_tweets = []
288
- base_sentiment = market_trend + np.random.normal(0, 0.1)
289
- base_sentiment = max(0.1, min(0.9, base_sentiment))
290
 
291
- tweet_templates = self._get_user_templates(username, base_sentiment)
292
 
293
- for i in range(np.random.randint(2, 6)): # 2-5 tweets per user
294
- template = np.random.choice(tweet_templates)
295
- tweet_text = template['text']
296
 
297
- # Add some randomness
298
- if np.random.random() < 0.3:
299
- tweet_text += " " + np.random.choice(["🚀", "📈", "📉", "💎", "🔥"])
 
300
 
301
  user_tweets.append({
302
  'text': tweet_text,
303
- 'timestamp': current_time - (i * 3600 * np.random.uniform(1, 4))
304
  })
305
 
306
  tweets[username] = user_tweets
307
 
308
  return tweets
309
 
310
- def _get_user_templates(self, username: str, base_sentiment: float) -> List[Dict]:
311
- """Get tweet templates based on user personality and sentiment"""
312
- bullish_templates = {
313
- 'elonmusk': [
314
- "The future is bright for digital assets! 🚀",
315
- "Adoption is accelerating faster than expected 📈",
316
- "Just added more to my position 💪",
317
- "Technology is evolving at an incredible pace 🌟"
318
- ],
319
- 'cz_binance': [
320
- "Strong fundamentals in the crypto space 📊",
321
- "Building for the next billion users 🏗️",
322
- "Innovation continues across the ecosystem 🔄",
323
- "Positive regulatory developments emerging ⚖️"
324
- ],
325
- 'saylor': [
326
- "Bitcoin represents digital excellence 💎",
327
- "The macroeconomic picture supports growth 📈",
328
- "Institutional adoption is accelerating 🏦",
329
- "Technology is the future of finance 🔮"
330
- ]
331
- }
332
-
333
- bearish_templates = {
334
- 'elonmusk': [
335
- "Market conditions looking challenging 🌧️",
336
- "Need to see more adoption for sustained growth 📉",
337
- "Regulatory concerns are weighing on sentiment ⚖️",
338
- "Volatility is higher than expected 📊"
339
  ],
340
- 'cz_binance': [
341
- "Market experiencing normal corrections 📉",
342
- "Important to manage risk in current environment 🛡️",
343
- "Short-term volatility doesn't change long-term thesis 🔄",
344
- "Focus on fundamentals over price action 📊"
345
  ],
346
- 'saylor': [
347
- "Short-term price action doesn't matter for long-term holders 💎",
348
- "Focus on the technology, not the noise 🔇",
349
- "Market cycles are normal and expected 🔄",
350
- "Education is key during volatile periods 📚"
351
  ]
352
  }
353
 
354
- neutral_templates = {
355
- 'elonmusk': [
356
- "Interesting developments in the space 🤔",
357
- "Keeping an eye on market movements 👀",
358
- "Technology continues to evolve 🔧",
359
- "The journey continues 🛣️"
360
- ],
361
- 'cz_binance': [
362
- "Monitoring market conditions 📊",
363
- "Continuing to build through all markets 🏗️",
364
- "Focus on long-term development 🎯",
365
- "Ecosystem growth continues 🌱"
366
- ],
367
- 'saylor': [
368
- "Bitcoin education is important 📖",
369
- "Understanding the technology is key 🔑",
370
- "Market cycles are part of growth 🔄",
371
- "Focus on the fundamentals 📊"
372
- ]
373
- }
374
 
375
- # Default templates for unknown users
376
- default_templates = {
377
- 'bullish': ["Market looking good!", "Positive developments ahead", "Growth continues"],
378
- 'bearish': ["Market challenges ahead", "Caution advised", "Volatility expected"],
379
- 'neutral': ["Monitoring developments", "Interesting times", "Continuing to watch"]
380
  }
381
-
382
- if base_sentiment > 0.6:
383
- templates = bullish_templates.get(username, default_templates['bullish'])
384
- elif base_sentiment < 0.4:
385
- templates = bearish_templates.get(username, default_templates['bearish'])
 
 
386
  else:
387
- templates = neutral_templates.get(username, default_templates['neutral'])
388
-
389
- return [{'text': template} for template in templates]
 
1
  import torch
2
+ import numpy as np
3
  from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline
4
  from textblob import TextBlob
5
  from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
6
+ from typing import Dict, List, Tuple, Optional
 
7
  import time
8
  from datetime import datetime, timedelta
9
  import re
10
+ import logging
11
+ from functools import lru_cache
12
+ import warnings
13
+ warnings.filterwarnings('ignore')
14
+
15
+ # Setup logging
16
+ logging.basicConfig(level=logging.INFO)
17
+ logger = logging.getLogger(__name__)
18
 
19
  class AdvancedSentimentAnalyzer:
20
+ def __init__(self, max_model_retries=3, cache_size=100):
21
  self.sentiment_models = {}
22
+ self.vader_analyzer = None
23
+ self.max_model_retries = max_model_retries
24
+ self.cache = {} # Simple cache for expensive operations
25
+
26
+ # Influencers with validation
27
+ self.influencers = self._validate_influencers({
28
  'elonmusk': {'name': 'Elon Musk', 'weight': 0.9, 'sector': 'all'},
29
  'cz_binance': {'name': 'Changpeng Zhao', 'weight': 0.8, 'sector': 'crypto'},
30
  'saylor': {'name': 'Michael Saylor', 'weight': 0.7, 'sector': 'bitcoin'},
 
33
  'peterlbrandt': {'name': 'Peter Brandt', 'weight': 0.8, 'sector': 'trading'},
34
  'nic__carter': {'name': 'Nic Carter', 'weight': 0.7, 'sector': 'crypto'},
35
  'avalancheavax': {'name': 'Avalanche', 'weight': 0.6, 'sector': 'defi'}
36
+ })
37
+
38
+ self._initialize_vader()
39
+
40
+ def _validate_influencers(self, influencers: Dict) -> Dict:
41
+ """Validate and normalize influencer weights"""
42
+ validated = {}
43
+ total_weight = 0
44
+
45
+ for username, data in influencers.items():
46
+ if 0.0 <= data.get('weight', 0) <= 1.0:
47
+ validated[username] = data
48
+ total_weight += data['weight']
49
 
50
+ # Normalize weights to sum to 1
51
+ if total_weight > 0:
52
+ for username in validated:
53
+ validated[username]['weight'] /= total_weight
54
+
55
+ logger.info(f"Validated {len(validated)} influencers with total weight {total_weight:.2f}")
56
+ return validated
57
+
58
+ def _initialize_vader(self):
59
+ """Initialize VADER safely"""
60
  try:
61
+ self.vader_analyzer = SentimentIntensityAnalyzer()
62
+ logger.info("VADER analyzer initialized")
63
+ except Exception as e:
64
+ logger.warning(f"Failed to initialize VADER: {e}")
65
+ self.vader_analyzer = None
66
+
67
+ @lru_cache(maxsize=128)
68
+ def _safe_pipeline_load(self, model_name: str):
69
+ """Safely load pipeline with caching and retries"""
70
+ for attempt in range(self.max_model_retries):
 
71
  try:
72
+ pipeline_obj = pipeline(
73
  "sentiment-analysis",
74
+ model=model_name,
75
+ tokenizer=model_name,
76
+ device=-1, # CPU only for stability
77
+ return_all_scores=False
78
  )
79
+ logger.info(f"Successfully loaded model: {model_name}")
80
+ return pipeline_obj
81
+ except Exception as e:
82
+ logger.warning(f"Attempt {attempt + 1} failed for {model_name}: {e}")
83
+ if attempt == self.max_model_retries - 1:
84
+ return None
85
+ time.sleep(1) # Brief delay before retry
86
+
87
+ def initialize_models(self) -> bool:
88
+ """Initialize all sentiment analysis models with fallback"""
89
+ success_count = 0
90
+
91
+ try:
92
+ # Financial sentiment model
93
+ financial_model = self._safe_pipeline_load(
94
+ "mrm8488/distilroberta-finetuned-financial-news-sentiment-analysis"
95
+ )
96
+ if financial_model:
97
+ self.sentiment_models['financial'] = financial_model
98
+ success_count += 1
99
+
100
+ # General sentiment model with fallback
101
+ general_model = self._safe_pipeline_load("distilbert-base-uncased-finetuned-sst-2-english")
102
+ if general_model:
103
+ self.sentiment_models['general'] = general_model
104
+ success_count += 1
105
+ else:
106
+ # Fallback to basic pipeline
107
+ try:
108
+ self.sentiment_models['general'] = pipeline("sentiment-analysis")
109
+ success_count += 1
110
+ except:
111
+ pass
112
 
113
+ # Crypto-specific model with fallback
114
+ crypto_model = self._safe_pipeline_load("ElKulako/cryptobert")
115
+ if crypto_model:
116
+ self.sentiment_models['crypto'] = crypto_model
117
+ success_count += 1
118
+ else:
119
+ self.sentiment_models['crypto'] = self.sentiment_models.get('financial',
120
+ self.sentiment_models.get('general'))
121
+ success_count += 1 if self.sentiment_models['crypto'] else 0
122
 
123
+ # At least one model should be available
124
+ if success_count > 0:
125
+ logger.info(f"✅ Loaded {success_count} sentiment models successfully!")
126
+ return True
127
+ else:
128
+ logger.error("❌ No sentiment models could be loaded")
129
+ return False
130
+
131
  except Exception as e:
132
+ logger.error(f"❌ Critical error loading models: {e}")
133
  return False
134
 
135
  def analyze_text_sentiment(self, text: str) -> Dict:
136
+ """Comprehensive sentiment analysis with robust error handling"""
137
+ if not text or len(text.strip()) < 5:
138
  return self._default_sentiment()
139
 
140
+ cache_key = hash(text.strip()[:100]) # Simple cache key
141
+ if cache_key in self.cache:
142
+ return self.cache[cache_key]
143
+
144
  try:
 
145
  cleaned_text = self._clean_text(text)
146
 
147
+ # Analyze with available models
148
+ model_results = []
 
 
 
 
149
 
150
+ # Financial model
151
+ if 'financial' in self.sentiment_models:
152
+ model_results.append(self._analyze_model(cleaned_text, 'financial'))
 
 
 
 
 
153
 
154
+ # General model
155
+ if 'general' in self.sentiment_models:
156
+ model_results.append(self._analyze_model(cleaned_text, 'general'))
 
 
 
 
 
157
 
158
+ # Crypto model
159
+ if 'crypto' in self.sentiment_models:
160
+ model_results.append(self._analyze_model(cleaned_text, 'crypto'))
161
+
162
+ # Rule-based models
163
+ if self.vader_analyzer:
164
+ model_results.append(self._analyze_vader(cleaned_text))
165
+
166
+ model_results.append(self._analyze_textblob(cleaned_text))
167
+
168
+ # Filter valid results
169
+ valid_results = [r for r in model_results if r['score'] is not None]
170
+
171
+ if not valid_results:
172
+ return self._default_sentiment()
173
+
174
+ # Weighted combination (prioritize financial/crypto models)
175
+ weights = {
176
+ 'financial': 0.35, 'crypto': 0.30, 'general': 0.20,
177
+ 'vader': 0.10, 'textblob': 0.05
178
+ }
179
+
180
+ weighted_score = 0.0
181
+ total_weight = 0.0
182
+ confidences = []
183
+
184
+ for result in valid_results:
185
+ model_type = result.get('model_type', 'unknown')
186
+ weight = weights.get(model_type, 0.1)
187
+ weighted_score += result['score'] * weight
188
+ total_weight += weight
189
+ if 'confidence' in result:
190
+ confidences.append(result['confidence'])
191
+
192
+ if total_weight > 0:
193
+ final_score = weighted_score / total_weight
194
+ final_confidence = np.mean(confidences) if confidences else 0.0
195
  else:
196
+ final_score = 0.5
197
+ final_confidence = 0.0
198
 
199
+ # Determine sentiment label
200
+ sentiment_label = self._score_to_label(final_score)
 
201
 
202
+ result = {
203
  "sentiment": sentiment_label,
204
+ "score": float(final_score),
205
+ "confidence": float(final_confidence),
206
+ "urgency": self._detect_urgency(cleaned_text),
207
+ "keywords": self._extract_keywords(cleaned_text),
208
+ "models_used": len(valid_results),
209
  "text_snippet": cleaned_text[:100] + "..." if len(cleaned_text) > 100 else cleaned_text
210
  }
211
 
212
+ # Cache result
213
+ self.cache[cache_key] = result
214
+ if len(self.cache) > 50: # Limit cache size
215
+ self.cache.pop(next(iter(self.cache)))
216
+
217
+ return result
218
+
219
  except Exception as e:
220
+ logger.error(f"Error in sentiment analysis: {e}")
221
  return self._default_sentiment()
222
 
223
+ def _analyze_model(self, text: str, model_type: str) -> Dict:
224
+ """Generic model analysis with error handling"""
225
  try:
226
+ model = self.sentiment_models[model_type]
227
+ result = model(text[:512], truncation=True, max_length=512)[0] # Limit text length
228
+
229
+ score_map = {
230
+ 'negative': 0.0, 'NEGATIVE': 0.0,
231
+ 'neutral': 0.5, 'NEUTRAL': 0.5,
232
+ 'positive': 1.0, 'POSITIVE': 1.0
233
  }
234
+
235
+ score = score_map.get(result['label'].upper(), 0.5)
 
 
 
 
 
 
236
  return {
237
+ 'score': score,
238
+ 'confidence': result['score'],
239
+ 'model_type': model_type
240
  }
241
+ except Exception as e:
242
+ logger.debug(f"Model {model_type} failed: {e}")
243
+ return {'score': None, 'confidence': 0.0, 'model_type': model_type}
244
 
245
+ def _score_to_label(self, score: float) -> str:
246
+ """Convert score to sentiment label"""
247
+ if score > 0.6:
248
+ return "bullish"
249
+ elif score > 0.4:
250
+ return "neutral"
251
+ else:
252
+ return "bearish"
 
 
 
253
 
254
  def _analyze_vader(self, text: str) -> Dict:
255
+ """VADER analysis with error handling"""
256
+ if not self.vader_analyzer:
257
+ return {'score': None, 'confidence': 0.0, 'model_type': 'vader'}
258
+
259
  try:
260
  scores = self.vader_analyzer.polarity_scores(text)
261
+ compound = (scores['compound'] + 1) / 2 # Normalize to 0-1
262
  return {
263
+ 'score': compound,
264
+ 'confidence': abs(scores['compound']),
265
+ 'model_type': 'vader'
266
  }
267
+ except Exception:
268
+ return {'score': None, 'confidence': 0.0, 'model_type': 'vader'}
269
 
270
  def _analyze_textblob(self, text: str) -> Dict:
271
+ """TextBlob analysis with error handling"""
272
  try:
273
  analysis = TextBlob(text)
274
+ polarity = (analysis.sentiment.polarity + 1) / 2 # Normalize to 0-1
275
  return {
276
+ 'score': polarity,
277
+ 'confidence': abs(analysis.sentiment.polarity),
278
+ 'model_type': 'textblob'
279
  }
280
+ except Exception:
281
+ return {'score': None, 'confidence': 0.0, 'model_type': 'textblob'}
282
 
283
  def _clean_text(self, text: str) -> str:
284
+ """Enhanced text cleaning"""
285
+ try:
286
+ # Remove URLs
287
+ text = re.sub(r'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\\(\\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+', '', text)
288
+ # Remove mentions
289
+ text = re.sub(r'@\w+', '', text)
290
+ # Remove hashtags but keep text
291
+ text = re.sub(r'#\w+', '', text)
292
+ # Remove extra whitespace and normalize
293
+ text = ' '.join(text.split())
294
+ return text.strip()
295
+ except:
296
+ return text[:200] if len(text) > 200 else text
297
 
298
  def _extract_keywords(self, text: str) -> List[str]:
299
+ """Extract financial keywords with better matching"""
300
+ keyword_categories = {
301
+ 'bullish': ['moon', 'rocket', 'bull', 'buy', 'long', 'growth', 'opportunity', 'bullrun'],
302
+ 'bearish': ['crash', 'bear', 'sell', 'short', 'drop', 'dump', 'warning', 'risk', 'fud'],
303
+ 'crypto': ['bitcoin', 'btc', 'ethereum', 'eth', 'crypto', 'blockchain', 'defi', 'nft'],
304
+ 'urgency': ['now', 'urgent', 'immediately', 'alert', 'breaking', 'huge']
305
  }
306
 
307
+ found = []
308
  text_lower = text.lower()
309
 
310
+ for category, keywords in keyword_categories.items():
311
  for keyword in keywords:
312
+ if re.search(rf'\b{keyword}\b', text_lower):
313
+ found.append(f"{category}:{keyword}")
314
 
315
+ return found[:5]
316
 
317
  def _detect_urgency(self, text: str) -> float:
318
+ """Improved urgency detection"""
319
+ urgency_indicators = ['!', 'urgent', 'breaking', 'alert', 'immediately', 'now', 'huge', 'massive']
320
  text_lower = text.lower()
321
 
322
+ score = 0.0
323
  for indicator in urgency_indicators:
324
+ if re.search(rf'\b{indicator}\b', text_lower):
325
+ score += 0.15
326
+
327
+ # Exclamation and question marks
328
+ punctuation_count = text.count('!') + text.count('?')
329
+ score += min(punctuation_count * 0.1, 0.3)
330
 
331
+ # Caps lock indicator
332
+ caps_ratio = sum(1 for c in text if c.isupper()) / len([c for c in text if c.isalpha()])
333
+ score += min(caps_ratio * 0.5, 0.2)
334
 
335
+ return min(score, 1.0)
336
 
337
  def _default_sentiment(self) -> Dict:
338
+ """Safe default sentiment"""
339
  return {
340
  "sentiment": "neutral",
341
  "score": 0.5,
 
347
  }
348
 
349
  def get_influencer_sentiment(self, hours_back: int = 24) -> Dict:
350
+ """Get weighted influencer sentiment with caching"""
351
+ try:
352
+ # Generate synthetic tweets (in production, replace with real API)
353
+ tweets = self._generate_synthetic_tweets(hours_back)
354
+ influencer_sentiments = {}
 
 
 
 
 
 
355
 
356
+ for username, tweet_batch in tweets.items():
357
+ if username not in self.influencers:
358
+ continue
359
+
360
+ tweet_sentiments = []
361
+ for tweet in tweet_batch:
362
+ sentiment = self.analyze_text_sentiment(tweet['text'])
363
+ sentiment.update({
364
+ 'timestamp': tweet['timestamp'],
365
+ 'username': username
366
+ })
367
+ tweet_sentiments.append(sentiment)
368
+
369
+ if tweet_sentiments:
370
+ # Weighted average by confidence
371
+ total_weighted = sum(s['score'] * s['confidence'] for s in tweet_sentiments)
372
+ total_confidence = sum(s['confidence'] for s in tweet_sentiments)
373
+
374
+ avg_score = total_weighted / total_confidence if total_confidence > 0 else 0.5
375
+ avg_confidence = np.mean([s['confidence'] for s in tweet_sentiments])
376
+
377
+ influencer_sentiments[username] = {
378
+ 'score': float(avg_score),
379
+ 'confidence': float(avg_confidence),
380
+ 'weight': self.influencers[username]['weight'],
381
+ 'tweet_count': len(tweet_sentiments),
382
+ 'tweets': tweet_sentiments[:3]
383
+ }
384
 
385
+ # Calculate market sentiment
386
+ if influencer_sentiments:
387
+ total_weighted_score = sum(
388
+ data['score'] * data['weight'] * data['confidence']
389
+ for data in influencer_sentiments.values()
390
+ )
391
+ total_weight = sum(
392
+ data['weight'] * data['confidence']
393
+ for data in influencer_sentiments.values()
394
+ )
395
+
396
+ market_sentiment = (total_weighted_score / total_weight
397
+ if total_weight > 0 else 0.5)
398
+ avg_confidence = np.mean([d['confidence'] for d in influencer_sentiments.values()])
399
+ else:
400
+ market_sentiment = 0.5
401
+ avg_confidence = 0.0
402
 
403
+ return {
404
+ "market_sentiment": float(market_sentiment),
405
+ "confidence": float(avg_confidence),
406
+ "influencer_count": len(influencer_sentiments),
407
+ "total_tweets": sum(d['tweet_count'] for d in influencer_sentiments.values()),
408
+ "timestamp": datetime.now().isoformat(),
409
+ "influencers": influencer_sentiments
410
+ }
411
+
412
+ except Exception as e:
413
+ logger.error(f"Error in get_influencer_sentiment: {e}")
414
+ return {
415
+ "market_sentiment": 0.5,
416
+ "confidence": 0.0,
417
+ "error": str(e),
418
+ "timestamp": datetime.now().isoformat()
419
+ }
420
 
421
  def _generate_synthetic_tweets(self, hours_back: int) -> Dict:
422
+ """Generate realistic synthetic tweets for testing"""
423
  current_time = time.time()
424
  tweets = {}
425
+ np.random.seed(int(current_time) % 10000) # Reproducible randomness
426
 
427
+ # Simulate market conditions
428
+ market_trend = np.sin(current_time / 3600) * 0.3 + 0.5
429
 
430
+ for username in self.influencers:
431
  user_tweets = []
432
+ base_sentiment = np.clip(market_trend + np.random.normal(0, 0.15), 0.1, 0.9)
 
433
 
434
+ templates = self._get_user_templates(username, base_sentiment)
435
 
436
+ for i in range(np.random.randint(1, 4)): # 1-3 tweets
437
+ template = np.random.choice(templates)
438
+ tweet_text = template.format(**self._get_template_vars(base_sentiment))
439
 
440
+ # Add emojis occasionally
441
+ if np.random.random() < 0.4:
442
+ emojis = self._get_relevant_emojis(base_sentiment)
443
+ tweet_text += " " + np.random.choice(emojis)
444
 
445
  user_tweets.append({
446
  'text': tweet_text,
447
+ 'timestamp': current_time - (i * 3600 * np.random.uniform(0.5, hours_back))
448
  })
449
 
450
  tweets[username] = user_tweets
451
 
452
  return tweets
453
 
454
+ def _get_user_templates(self, username: str, sentiment: float) -> List[str]:
455
+ """Get appropriate templates based on sentiment"""
456
+ templates = {
457
+ 'bullish': [
458
+ "{action} looking strong! {emoji}",
459
+ "Great {topic} developments ahead 🚀",
460
+ "Bullish on {topic} {emoji}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
461
  ],
462
+ 'bearish': [
463
+ "Caution on {topic} {emoji}",
464
+ "{action} facing challenges 📉",
465
+ "Bearish signals for {topic}"
 
466
  ],
467
+ 'neutral': [
468
+ "Watching {topic} developments 👀",
469
+ "{action} market update 📊",
470
+ "Interesting {topic} news"
 
471
  ]
472
  }
473
 
474
+ category = 'bullish' if sentiment > 0.6 else 'bearish' if sentiment < 0.4 else 'neutral'
475
+ return templates[category]
476
+
477
+ def _get_template_vars(self, sentiment: float) -> Dict:
478
+ """Get variables for tweet templates"""
479
+ topics = ['BTC', 'crypto', 'market', 'DeFi']
480
+ actions = ['Bitcoin', 'ETH', 'market', 'altcoins']
 
 
 
 
 
 
 
 
 
 
 
 
 
481
 
482
+ return {
483
+ 'topic': np.random.choice(topics),
484
+ 'action': np.random.choice(actions),
485
+ 'emoji': np.random.choice(['📈', '📉', '🚀', '💎'])
 
486
  }
487
+
488
+ def _get_relevant_emojis(self, sentiment: float) -> List[str]:
489
+ """Get sentiment-relevant emojis"""
490
+ if sentiment > 0.6:
491
+ return ['🚀', '📈', '💎', '🔥']
492
+ elif sentiment < 0.4:
493
+ return ['📉', '😬', '⚠️', '💥']
494
  else:
495
+ return ['📊', '👀', '🤔', '💭']