OmidSakaki commited on
Commit
f52806e
ยท
verified ยท
1 Parent(s): c8f80bd

Create src/sentiment/twitter_analyzer.py

Browse files
Files changed (1) hide show
  1. src/sentiment/twitter_analyzer.py +389 -0
src/sentiment/twitter_analyzer.py ADDED
@@ -0,0 +1,389 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline
3
+ from textblob import TextBlob
4
+ from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
5
+ import numpy as np
6
+ from typing import Dict, List, Tuple
7
+ import time
8
+ from datetime import datetime, timedelta
9
+ import re
10
+
11
+ class AdvancedSentimentAnalyzer:
12
+ def __init__(self):
13
+ self.sentiment_models = {}
14
+ self.vader_analyzer = SentimentIntensityAnalyzer()
15
+ self.influencers = {
16
+ 'elonmusk': {'name': 'Elon Musk', 'weight': 0.9, 'sector': 'all'},
17
+ 'cz_binance': {'name': 'Changpeng Zhao', 'weight': 0.8, 'sector': 'crypto'},
18
+ 'saylor': {'name': 'Michael Saylor', 'weight': 0.7, 'sector': 'bitcoin'},
19
+ 'crypto_bitlord': {'name': 'Crypto Bitlord', 'weight': 0.6, 'sector': 'crypto'},
20
+ 'aantonop': {'name': 'Andreas Antonopoulos', 'weight': 0.7, 'sector': 'bitcoin'},
21
+ 'peterlbrandt': {'name': 'Peter Brandt', 'weight': 0.8, 'sector': 'trading'},
22
+ 'nic__carter': {'name': 'Nic Carter', 'weight': 0.7, 'sector': 'crypto'},
23
+ 'avalancheavax': {'name': 'Avalanche', 'weight': 0.6, 'sector': 'defi'}
24
+ }
25
+
26
+ def initialize_models(self):
27
+ """Initialize all sentiment analysis models"""
28
+ try:
29
+ # Financial sentiment model
30
+ self.sentiment_models['financial'] = pipeline(
31
+ "sentiment-analysis",
32
+ model="mrm8488/distilroberta-finetuned-financial-news-sentiment-analysis",
33
+ tokenizer="mrm8488/distilroberta-finetuned-financial-news-sentiment-analysis"
34
+ )
35
+
36
+ # General sentiment model
37
+ self.sentiment_models['general'] = pipeline("sentiment-analysis")
38
+
39
+ # Crypto-specific model
40
+ try:
41
+ self.sentiment_models['crypto'] = pipeline(
42
+ "sentiment-analysis",
43
+ model="ElKulako/cryptobert",
44
+ tokenizer="ElKulako/cryptobert"
45
+ )
46
+ except:
47
+ self.sentiment_models['crypto'] = self.sentiment_models['financial']
48
+
49
+ print("โœ… All sentiment models loaded successfully!")
50
+ return True
51
+
52
+ except Exception as e:
53
+ print(f"โŒ Error loading models: {e}")
54
+ return False
55
+
56
+ def analyze_text_sentiment(self, text: str) -> Dict:
57
+ """Comprehensive sentiment analysis using multiple models"""
58
+ if not text or len(text.strip()) < 10:
59
+ return self._default_sentiment()
60
+
61
+ try:
62
+ # Clean text
63
+ cleaned_text = self._clean_text(text)
64
+
65
+ # Analyze with multiple models
66
+ financial_sentiment = self._analyze_financial(cleaned_text)
67
+ general_sentiment = self._analyze_general(cleaned_text)
68
+ crypto_sentiment = self._analyze_crypto(cleaned_text)
69
+ vader_sentiment = self._analyze_vader(cleaned_text)
70
+ textblob_sentiment = self._analyze_textblob(cleaned_text)
71
+
72
+ # Combine results with weights
73
+ sentiments = [
74
+ (financial_sentiment['score'], 0.3),
75
+ (general_sentiment['score'], 0.2),
76
+ (crypto_sentiment['score'], 0.25),
77
+ (vader_sentiment['compound'], 0.15),
78
+ (textblob_sentiment['polarity'], 0.1)
79
+ ]
80
+
81
+ weighted_score = sum(score * weight for score, weight in sentiments)
82
+ confidence = np.mean([
83
+ financial_sentiment['confidence'],
84
+ general_sentiment['confidence'],
85
+ crypto_sentiment['confidence'],
86
+ vader_sentiment['confidence'],
87
+ textblob_sentiment['confidence']
88
+ ])
89
+
90
+ # Determine sentiment label
91
+ if weighted_score > 0.6:
92
+ sentiment_label = "bullish"
93
+ elif weighted_score > 0.4:
94
+ sentiment_label = "neutral"
95
+ else:
96
+ sentiment_label = "bearish"
97
+
98
+ # Extract keywords and urgency
99
+ keywords = self._extract_keywords(cleaned_text)
100
+ urgency = self._detect_urgency(cleaned_text)
101
+
102
+ return {
103
+ "sentiment": sentiment_label,
104
+ "score": float(weighted_score),
105
+ "confidence": float(confidence),
106
+ "urgency": urgency,
107
+ "keywords": keywords,
108
+ "models_used": len([s for s in sentiments if s[0] != 0.5]),
109
+ "text_snippet": cleaned_text[:100] + "..." if len(cleaned_text) > 100 else cleaned_text
110
+ }
111
+
112
+ except Exception as e:
113
+ print(f"Error in sentiment analysis: {e}")
114
+ return self._default_sentiment()
115
+
116
+ def _analyze_financial(self, text: str) -> Dict:
117
+ """Analyze with financial sentiment model"""
118
+ try:
119
+ result = self.sentiment_models['financial'](text)[0]
120
+ score_map = {"negative": 0.0, "neutral": 0.5, "positive": 1.0}
121
+ return {
122
+ 'score': score_map.get(result['label'].lower(), 0.5),
123
+ 'confidence': result['score']
124
+ }
125
+ except:
126
+ return {'score': 0.5, 'confidence': 0.0}
127
+
128
+ def _analyze_general(self, text: str) -> Dict:
129
+ """Analyze with general sentiment model"""
130
+ try:
131
+ result = self.sentiment_models['general'](text)[0]
132
+ score_map = {"negative": 0.0, "neutral": 0.5, "positive": 1.0}
133
+ return {
134
+ 'score': score_map.get(result['label'].lower(), 0.5),
135
+ 'confidence': result['score']
136
+ }
137
+ except:
138
+ return {'score': 0.5, 'confidence': 0.0}
139
+
140
+ def _analyze_crypto(self, text: str) -> Dict:
141
+ """Analyze with crypto-specific model"""
142
+ try:
143
+ result = self.sentiment_models['crypto'](text)[0]
144
+ score_map = {"negative": 0.0, "neutral": 0.5, "positive": 1.0}
145
+ return {
146
+ 'score': score_map.get(result['label'].lower(), 0.5),
147
+ 'confidence': result['score']
148
+ }
149
+ except:
150
+ return {'score': 0.5, 'confidence': 0.0}
151
+
152
+ def _analyze_vader(self, text: str) -> Dict:
153
+ """Analyze with VADER sentiment analyzer"""
154
+ try:
155
+ scores = self.vader_analyzer.polarity_scores(text)
156
+ return {
157
+ 'compound': (scores['compound'] + 1) / 2, # Convert to 0-1 scale
158
+ 'confidence': abs(scores['compound'])
159
+ }
160
+ except:
161
+ return {'compound': 0.5, 'confidence': 0.0}
162
+
163
+ def _analyze_textblob(self, text: str) -> Dict:
164
+ """Analyze with TextBlob"""
165
+ try:
166
+ analysis = TextBlob(text)
167
+ return {
168
+ 'polarity': (analysis.sentiment.polarity + 1) / 2, # Convert to 0-1 scale
169
+ 'confidence': abs(analysis.sentiment.polarity)
170
+ }
171
+ except:
172
+ return {'polarity': 0.5, 'confidence': 0.0}
173
+
174
+ def _clean_text(self, text: str) -> str:
175
+ """Clean and preprocess text"""
176
+ # Remove URLs
177
+ text = re.sub(r'http\S+', '', text)
178
+ # Remove mentions and hashtags but keep the text
179
+ text = re.sub(r'@\w+', '', text)
180
+ text = re.sub(r'#', '', text)
181
+ # Remove extra whitespace
182
+ text = ' '.join(text.split())
183
+ return text.strip()
184
+
185
+ def _extract_keywords(self, text: str) -> List[str]:
186
+ """Extract relevant financial keywords"""
187
+ financial_keywords = {
188
+ 'bullish': ['moon', 'rocket', 'bull', 'buy', 'long', 'growth', 'opportunity'],
189
+ 'bearish': ['crash', 'bear', 'sell', 'short', 'drop', 'warning', 'risk'],
190
+ 'crypto': ['bitcoin', 'btc', 'ethereum', 'eth', 'crypto', 'blockchain', 'defi'],
191
+ 'urgency': ['now', 'urgent', 'immediately', 'alert', 'breaking']
192
+ }
193
+
194
+ found_keywords = []
195
+ text_lower = text.lower()
196
+
197
+ for category, keywords in financial_keywords.items():
198
+ for keyword in keywords:
199
+ if keyword in text_lower:
200
+ found_keywords.append(f"{category}:{keyword}")
201
+
202
+ return found_keywords[:5] # Return top 5 keywords
203
+
204
+ def _detect_urgency(self, text: str) -> float:
205
+ """Detect urgency level in text"""
206
+ urgency_indicators = ['!', 'urgent', 'breaking', 'alert', 'immediately', 'now']
207
+ text_lower = text.lower()
208
+
209
+ urgency_score = 0.0
210
+ for indicator in urgency_indicators:
211
+ if indicator in text_lower:
212
+ urgency_score += 0.2
213
+
214
+ # Count exclamation marks
215
+ exclamation_count = text.count('!')
216
+ urgency_score += min(exclamation_count * 0.1, 0.3)
217
+
218
+ return min(urgency_score, 1.0)
219
+
220
+ def _default_sentiment(self) -> Dict:
221
+ """Return default sentiment when analysis fails"""
222
+ return {
223
+ "sentiment": "neutral",
224
+ "score": 0.5,
225
+ "confidence": 0.0,
226
+ "urgency": 0.0,
227
+ "keywords": [],
228
+ "models_used": 0,
229
+ "text_snippet": ""
230
+ }
231
+
232
+ def get_influencer_sentiment(self, hours_back: int = 24) -> Dict:
233
+ """Get sentiment analysis from multiple influencers"""
234
+ all_tweets = self._generate_synthetic_tweets(hours_back)
235
+ influencer_sentiments = {}
236
+
237
+ for username, tweet_batch in all_tweets.items():
238
+ tweet_sentiments = []
239
+ for tweet in tweet_batch:
240
+ sentiment = self.analyze_text_sentiment(tweet['text'])
241
+ sentiment['timestamp'] = tweet['timestamp']
242
+ sentiment['username'] = username
243
+ tweet_sentiments.append(sentiment)
244
+
245
+ if tweet_sentiments:
246
+ avg_score = np.mean([t['score'] for t in tweet_sentiments])
247
+ avg_confidence = np.mean([t['confidence'] for t in tweet_sentiments])
248
+ influencer_sentiments[username] = {
249
+ 'score': avg_score,
250
+ 'confidence': avg_confidence,
251
+ 'weight': self.influencers[username]['weight'],
252
+ 'tweet_count': len(tweet_sentiments),
253
+ 'recent_tweets': tweet_sentiments[:2] # Last 2 tweets
254
+ }
255
+
256
+ # Calculate weighted market sentiment
257
+ if influencer_sentiments:
258
+ total_weighted_score = 0
259
+ total_weight = 0
260
+
261
+ for username, data in influencer_sentiments.items():
262
+ total_weighted_score += data['score'] * data['weight']
263
+ total_weight += data['weight']
264
+
265
+ market_sentiment = total_weighted_score / total_weight if total_weight > 0 else 0.5
266
+ else:
267
+ market_sentiment = 0.5
268
+
269
+ return {
270
+ "market_sentiment": market_sentiment,
271
+ "confidence": np.mean([d['confidence'] for d in influencer_sentiments.values()]) if influencer_sentiments else 0.0,
272
+ "influencer_count": len(influencer_sentiments),
273
+ "total_tweets": sum(d['tweet_count'] for d in influencer_sentiments.values()),
274
+ "breakdown": influencer_sentiments,
275
+ "timestamp": datetime.now().isoformat()
276
+ }
277
+
278
+ def _generate_synthetic_tweets(self, hours_back: int) -> Dict:
279
+ """Generate realistic synthetic tweets based on market simulation"""
280
+ current_time = time.time()
281
+ tweets = {}
282
+
283
+ # Market condition simulation
284
+ market_trend = np.sin(current_time / 3600) * 0.3 + 0.5 # Oscillating trend
285
+
286
+ for username, info in self.influencers.items():
287
+ user_tweets = []
288
+ base_sentiment = market_trend + np.random.normal(0, 0.1)
289
+ base_sentiment = max(0.1, min(0.9, base_sentiment))
290
+
291
+ tweet_templates = self._get_user_templates(username, base_sentiment)
292
+
293
+ for i in range(np.random.randint(2, 6)): # 2-5 tweets per user
294
+ template = np.random.choice(tweet_templates)
295
+ tweet_text = template['text']
296
+
297
+ # Add some randomness
298
+ if np.random.random() < 0.3:
299
+ tweet_text += " " + np.random.choice(["๐Ÿš€", "๐Ÿ“ˆ", "๐Ÿ“‰", "๐Ÿ’Ž", "๐Ÿ”ฅ"])
300
+
301
+ user_tweets.append({
302
+ 'text': tweet_text,
303
+ 'timestamp': current_time - (i * 3600 * np.random.uniform(1, 4))
304
+ })
305
+
306
+ tweets[username] = user_tweets
307
+
308
+ return tweets
309
+
310
+ def _get_user_templates(self, username: str, base_sentiment: float) -> List[Dict]:
311
+ """Get tweet templates based on user personality and sentiment"""
312
+ bullish_templates = {
313
+ 'elonmusk': [
314
+ "The future is bright for digital assets! ๐Ÿš€",
315
+ "Adoption is accelerating faster than expected ๐Ÿ“ˆ",
316
+ "Just added more to my position ๐Ÿ’ช",
317
+ "Technology is evolving at an incredible pace ๐ŸŒŸ"
318
+ ],
319
+ 'cz_binance': [
320
+ "Strong fundamentals in the crypto space ๐Ÿ“Š",
321
+ "Building for the next billion users ๐Ÿ—๏ธ",
322
+ "Innovation continues across the ecosystem ๐Ÿ”„",
323
+ "Positive regulatory developments emerging โš–๏ธ"
324
+ ],
325
+ 'saylor': [
326
+ "Bitcoin represents digital excellence ๐Ÿ’Ž",
327
+ "The macroeconomic picture supports growth ๐Ÿ“ˆ",
328
+ "Institutional adoption is accelerating ๐Ÿฆ",
329
+ "Technology is the future of finance ๐Ÿ”ฎ"
330
+ ]
331
+ }
332
+
333
+ bearish_templates = {
334
+ 'elonmusk': [
335
+ "Market conditions looking challenging ๐ŸŒง๏ธ",
336
+ "Need to see more adoption for sustained growth ๐Ÿ“‰",
337
+ "Regulatory concerns are weighing on sentiment โš–๏ธ",
338
+ "Volatility is higher than expected ๐Ÿ“Š"
339
+ ],
340
+ 'cz_binance': [
341
+ "Market experiencing normal corrections ๐Ÿ“‰",
342
+ "Important to manage risk in current environment ๐Ÿ›ก๏ธ",
343
+ "Short-term volatility doesn't change long-term thesis ๐Ÿ”„",
344
+ "Focus on fundamentals over price action ๐Ÿ“Š"
345
+ ],
346
+ 'saylor': [
347
+ "Short-term price action doesn't matter for long-term holders ๐Ÿ’Ž",
348
+ "Focus on the technology, not the noise ๐Ÿ”‡",
349
+ "Market cycles are normal and expected ๐Ÿ”„",
350
+ "Education is key during volatile periods ๐Ÿ“š"
351
+ ]
352
+ }
353
+
354
+ neutral_templates = {
355
+ 'elonmusk': [
356
+ "Interesting developments in the space ๐Ÿค”",
357
+ "Keeping an eye on market movements ๐Ÿ‘€",
358
+ "Technology continues to evolve ๐Ÿ”ง",
359
+ "The journey continues ๐Ÿ›ฃ๏ธ"
360
+ ],
361
+ 'cz_binance': [
362
+ "Monitoring market conditions ๐Ÿ“Š",
363
+ "Continuing to build through all markets ๐Ÿ—๏ธ",
364
+ "Focus on long-term development ๐ŸŽฏ",
365
+ "Ecosystem growth continues ๐ŸŒฑ"
366
+ ],
367
+ 'saylor': [
368
+ "Bitcoin education is important ๐Ÿ“–",
369
+ "Understanding the technology is key ๐Ÿ”‘",
370
+ "Market cycles are part of growth ๐Ÿ”„",
371
+ "Focus on the fundamentals ๐Ÿ“Š"
372
+ ]
373
+ }
374
+
375
+ # Default templates for unknown users
376
+ default_templates = {
377
+ 'bullish': ["Market looking good!", "Positive developments ahead", "Growth continues"],
378
+ 'bearish': ["Market challenges ahead", "Caution advised", "Volatility expected"],
379
+ 'neutral': ["Monitoring developments", "Interesting times", "Continuing to watch"]
380
+ }
381
+
382
+ if base_sentiment > 0.6:
383
+ templates = bullish_templates.get(username, default_templates['bullish'])
384
+ elif base_sentiment < 0.4:
385
+ templates = bearish_templates.get(username, default_templates['bearish'])
386
+ else:
387
+ templates = neutral_templates.get(username, default_templates['neutral'])
388
+
389
+ return [{'text': template} for template in templates]