codealchemist01 commited on
Commit
f0fe12a
·
verified ·
1 Parent(s): 20fd239

🔧 Fix: Yerel uygulamayla tam uyumlu hale getir - model isimleri ve ensemble weights düzeltildi

Browse files
Files changed (1) hide show
  1. app.py +460 -473
app.py CHANGED
@@ -1,473 +1,460 @@
1
- #!/usr/bin/env python3
2
- """
3
- Financial Sentiment Analysis - Enhanced Ensemble Gradio Demo for Hugging Face Space
4
- 3-Model Ensemble System with Rule Engine
5
- """
6
-
7
- import gradio as gr
8
- import torch
9
- import numpy as np
10
- from transformers import AutoTokenizer, AutoModelForSequenceClassification
11
- import logging
12
- import re
13
- from typing import Dict, List, Tuple
14
-
15
- # Logging setup
16
- logging.basicConfig(level=logging.INFO)
17
- logger = logging.getLogger(__name__)
18
-
19
- class SentimentRuleEngine:
20
- """Rule-based post-processing for sentiment analysis"""
21
-
22
- def __init__(self):
23
- # Strong bullish keywords with weights
24
- self.bullish_keywords = {
25
- 'soaring': 0.9, 'skyrocketing': 0.9, 'surging': 0.9, 'exploding': 0.9,
26
- 'excellent': 0.8, 'outstanding': 0.8, 'exceptional': 0.8, 'amazing': 0.8,
27
- 'breakthrough': 0.8, 'revolutionary': 0.8, 'record-breaking': 0.9,
28
- 'all-time high': 0.9, 'new high': 0.8, 'moon': 0.8, 'rocket': 0.8,
29
- 'mooning': 0.9, 'rocketing': 0.8, 'booming': 0.7, 'thriving': 0.7,
30
- 'up 10%': 0.8, 'up 15%': 0.9, 'up 20%': 0.9, 'gained 10%': 0.8,
31
- 'rose 15%': 0.8, 'jumped 20%': 0.9, 'spiked': 0.8, 'surged': 0.8,
32
- 'rising': 0.6, 'climbing': 0.6, 'gaining': 0.6, 'growing': 0.6,
33
- 'strong': 0.5, 'solid': 0.5, 'robust': 0.5, 'healthy': 0.5,
34
- 'positive': 0.4, 'optimistic': 0.5, 'bullish': 0.8, 'rally': 0.7,
35
- 'beat': 0.7, 'exceeded': 0.7, 'outperformed': 0.7, 'success': 0.6,
36
- 'profit': 0.3, 'earnings': 0.2, 'revenue': 0.2, 'growth': 0.5
37
- }
38
-
39
- # Strong bearish keywords with weights
40
- self.bearish_keywords = {
41
- 'crashing': 0.9, 'plummeting': 0.9, 'collapsing': 0.9, 'tanking': 0.9,
42
- 'disaster': 0.8, 'terrible': 0.8, 'awful': 0.8, 'horrible': 0.8,
43
- 'crisis': 0.7, 'recession': 0.8, 'bankruptcy': 0.9, 'failed': 0.7,
44
- 'down 10%': 0.8, 'down 15%': 0.9, 'down 20%': 0.9, 'lost 10%': 0.8,
45
- 'fell 15%': 0.8, 'dropped 20%': 0.9, 'plunged': 0.8, 'tumbled': 0.7,
46
- 'falling': 0.6, 'declining': 0.6, 'dropping': 0.6, 'losing': 0.6,
47
- 'weak': 0.5, 'poor': 0.5, 'bad': 0.4, 'negative': 0.4,
48
- 'bearish': 0.8, 'selloff': 0.7, 'sell-off': 0.7, 'correction': 0.6,
49
- 'missed': 0.6, 'disappointed': 0.6, 'concerns': 0.4, 'worried': 0.5
50
- }
51
-
52
- # Neutral keywords that should reduce extreme predictions
53
- self.neutral_keywords = {
54
- 'mixed': 0.7, 'uncertain': 0.6, 'unclear': 0.6, 'sideways': 0.8,
55
- 'flat': 0.7, 'stable': 0.5, 'unchanged': 0.8, 'waiting': 0.6,
56
- 'consolidating': 0.7, 'range-bound': 0.8, 'choppy': 0.7
57
- }
58
-
59
- def extract_keywords(self, text: str) -> Dict[str, float]:
60
- """Extract sentiment keywords and their weights from text"""
61
- text_lower = text.lower()
62
- found_keywords = {'bullish': [], 'bearish': [], 'neutral': []}
63
-
64
- # Check for bullish keywords
65
- for keyword, weight in self.bullish_keywords.items():
66
- if keyword in text_lower:
67
- found_keywords['bullish'].append((keyword, weight))
68
-
69
- # Check for bearish keywords
70
- for keyword, weight in self.bearish_keywords.items():
71
- if keyword in text_lower:
72
- found_keywords['bearish'].append((keyword, weight))
73
-
74
- # Check for neutral keywords
75
- for keyword, weight in self.neutral_keywords.items():
76
- if keyword in text_lower:
77
- found_keywords['neutral'].append((keyword, weight))
78
-
79
- return found_keywords
80
-
81
- def apply_rules(self, text: str, model_probabilities: np.ndarray,
82
- confidence_threshold: float = 0.7) -> Tuple[np.ndarray, str]:
83
- """Apply rule-based post-processing to model predictions"""
84
-
85
- original_probs = model_probabilities.copy()
86
- adjusted_probs = model_probabilities.copy()
87
-
88
- # Extract keywords
89
- keywords = self.extract_keywords(text)
90
-
91
- # Calculate keyword scores
92
- bullish_score = sum(weight for _, weight in keywords['bullish'])
93
- bearish_score = sum(weight for _, weight in keywords['bearish'])
94
- neutral_score = sum(weight for _, weight in keywords['neutral'])
95
-
96
- explanation_parts = []
97
-
98
- # Apply adjustments based on keyword scores
99
- if bullish_score > 0.5:
100
- # Boost bullish probability
101
- boost = min(0.3, bullish_score * 0.2)
102
- adjusted_probs[2] += boost # Bullish
103
- adjusted_probs[0] = max(0.05, adjusted_probs[0] - boost/2) # Bearish
104
- adjusted_probs[1] = max(0.05, adjusted_probs[1] - boost/2) # Neutral
105
- explanation_parts.append(f"Bullish keywords detected (score: {bullish_score:.2f})")
106
-
107
- if bearish_score > 0.5:
108
- # Boost bearish probability
109
- boost = min(0.3, bearish_score * 0.2)
110
- adjusted_probs[0] += boost # Bearish
111
- adjusted_probs[2] = max(0.05, adjusted_probs[2] - boost/2) # Bullish
112
- adjusted_probs[1] = max(0.05, adjusted_probs[1] - boost/2) # Neutral
113
- explanation_parts.append(f"Bearish keywords detected (score: {bearish_score:.2f})")
114
-
115
- if neutral_score > 0.5:
116
- # Boost neutral probability
117
- boost = min(0.2, neutral_score * 0.15)
118
- adjusted_probs[1] += boost # Neutral
119
- adjusted_probs[0] = max(0.05, adjusted_probs[0] - boost/2) # Bearish
120
- adjusted_probs[2] = max(0.05, adjusted_probs[2] - boost/2) # Bullish
121
- explanation_parts.append(f"Neutral keywords detected (score: {neutral_score:.2f})")
122
-
123
- # Normalize probabilities
124
- adjusted_probs = adjusted_probs / np.sum(adjusted_probs)
125
-
126
- # Create explanation
127
- if explanation_parts:
128
- explanation = "Applied: " + ", ".join(explanation_parts)
129
- else:
130
- explanation = "No significant keywords detected"
131
-
132
- return adjusted_probs, explanation
133
-
134
- # Initialize rule engine
135
- rule_engine = SentimentRuleEngine()
136
-
137
- class FinancialSentimentEnsemble:
138
- """Ensemble model for financial sentiment analysis using Hugging Face models"""
139
-
140
- def __init__(self):
141
- self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
142
- self.models = {}
143
- self.tokenizers = {}
144
- self.label_names = ["Bearish 📉", "Neutral ⚖️", "Bullish 📈"]
145
-
146
- # Hugging Face model configurations
147
- self.model_configs = {
148
- "distilbert": {
149
- "name": "DistilBERT (Fast)",
150
- "repo_id": "codealchemist01/financial-sentiment-distilbert",
151
- "description": "Fast and efficient model"
152
- },
153
- "bert_large": {
154
- "name": "BERT-Large (Advanced)",
155
- "repo_id": "codealchemist01/financial-sentiment-bert-large",
156
- "description": "Most advanced model"
157
- },
158
- "improved": {
159
- "name": "Improved Model",
160
- "repo_id": "codealchemist01/financial-sentiment-improved",
161
- "description": "Enhanced model with advanced training"
162
- }
163
- }
164
-
165
- # Ensemble weights for different combinations
166
- self.ensemble_weights = {
167
- "smart_ensemble": {"distilbert": 0.3, "bert_large": 0.7},
168
- "all_models": {"distilbert": 0.2, "improved": 0.3, "bert_large": 0.5}
169
- }
170
-
171
- self.load_models()
172
-
173
- def load_models(self):
174
- """Load models from Hugging Face Hub"""
175
- loaded_models = []
176
-
177
- for model_key, config in self.model_configs.items():
178
- try:
179
- logger.info(f"Loading {config['name']} from {config['repo_id']}")
180
-
181
- tokenizer = AutoTokenizer.from_pretrained(config["repo_id"])
182
- model = AutoModelForSequenceClassification.from_pretrained(config["repo_id"])
183
- model.to(self.device)
184
- model.eval()
185
-
186
- self.tokenizers[model_key] = tokenizer
187
- self.models[model_key] = model
188
- loaded_models.append(config["name"])
189
-
190
- logger.info(f"✅ {config['name']} loaded successfully")
191
-
192
- except Exception as e:
193
- logger.error(f"❌ Error loading {config['name']}: {e}")
194
-
195
- logger.info(f"🎯 Total loaded models: {len(loaded_models)}")
196
- return loaded_models
197
-
198
- def predict_single_model(self, text, model_key):
199
- """Get prediction from a single model"""
200
- if model_key not in self.models:
201
- return None, f"Model {model_key} not available"
202
-
203
- try:
204
- tokenizer = self.tokenizers[model_key]
205
- model = self.models[model_key]
206
-
207
- inputs = tokenizer(
208
- text,
209
- return_tensors="pt",
210
- truncation=True,
211
- padding=True,
212
- max_length=512
213
- )
214
- inputs = {k: v.to(self.device) for k, v in inputs.items()}
215
-
216
- with torch.no_grad():
217
- outputs = model(**inputs)
218
- probabilities = torch.nn.functional.softmax(outputs.logits, dim=-1)
219
- probabilities = probabilities.cpu().numpy()[0]
220
-
221
- return probabilities, None
222
-
223
- except Exception as e:
224
- return None, f"Error in {model_key}: {str(e)}"
225
-
226
- def predict_ensemble(self, text, ensemble_type="smart_ensemble", use_rules=True):
227
- """Make ensemble prediction"""
228
- if not text.strip():
229
- return "Please enter some text to analyze.", {}, ""
230
-
231
- try:
232
- # Determine which models to use
233
- if ensemble_type == "smart_ensemble":
234
- weights = self.ensemble_weights["smart_ensemble"]
235
- models_to_use = ["distilbert", "bert_large"]
236
- elif ensemble_type == "all_models":
237
- weights = self.ensemble_weights["all_models"]
238
- models_to_use = ["distilbert", "improved", "bert_large"]
239
- else:
240
- # Single model prediction
241
- models_to_use = [ensemble_type]
242
- weights = {ensemble_type: 1.0}
243
-
244
- # Get predictions from each model
245
- ensemble_probabilities = np.zeros(3)
246
- total_weight = 0
247
- model_predictions = {}
248
- model_details = []
249
-
250
- for model_key in models_to_use:
251
- if model_key in self.models:
252
- probabilities, error = self.predict_single_model(text, model_key)
253
- if probabilities is not None:
254
- weight = weights.get(model_key, 1.0)
255
- ensemble_probabilities += probabilities * weight
256
- total_weight += weight
257
-
258
- # Store individual results
259
- predicted_class = np.argmax(probabilities)
260
- confidence = probabilities[predicted_class]
261
- model_predictions[model_key] = {
262
- "prediction": self.label_names[predicted_class],
263
- "confidence": float(confidence),
264
- "probabilities": probabilities.tolist()
265
- }
266
-
267
- model_details.append(
268
- f"**{self.model_configs[model_key]['name']}:** "
269
- f"{self.label_names[predicted_class]} ({confidence:.2%})"
270
- )
271
-
272
- if total_weight == 0:
273
- return "No models available for prediction.", {}, ""
274
-
275
- # Normalize ensemble probabilities
276
- ensemble_probabilities = ensemble_probabilities / total_weight
277
-
278
- # Apply rule-based post-processing if enabled
279
- rule_explanation = ""
280
- if use_rules:
281
- ensemble_probabilities, rule_explanation = rule_engine.apply_rules(
282
- text, ensemble_probabilities, confidence_threshold=0.7
283
- )
284
-
285
- # Get final prediction
286
- predicted_class = np.argmax(ensemble_probabilities)
287
- confidence = ensemble_probabilities[predicted_class]
288
-
289
- # Create detailed results
290
- if len(models_to_use) > 1:
291
- result_text = f"**🎯 Ensemble Prediction:** {self.label_names[predicted_class]}\\n"
292
- result_text += f"**🔥 Ensemble Confidence:** {confidence:.2%}\\n\\n"
293
-
294
- result_text += "**🤖 Individual Model Results:**\\n"
295
- for detail in model_details:
296
- result_text += f"- {detail}\\n"
297
- result_text += "\\n"
298
- else:
299
- result_text = f"**🎯 Prediction:** {self.label_names[predicted_class]}\\n"
300
- result_text += f"**🔥 Confidence:** {confidence:.2%}\\n\\n"
301
-
302
- # Show rule engine effects if applied
303
- if use_rules and rule_explanation:
304
- result_text += f"**🤖 Rule Engine:** {rule_explanation}\\n\\n"
305
-
306
- result_text += "**📊 Final Probabilities:**\\n"
307
-
308
- # Create probability dictionary for gradio
309
- prob_dict = {}
310
- for i, (label, prob) in enumerate(zip(self.label_names, ensemble_probabilities)):
311
- prob_dict[label] = float(prob)
312
- result_text += f"- {label}: {prob:.2%}\\n"
313
-
314
- # Create model comparison details
315
- comparison_details = ""
316
- if len(model_predictions) > 1:
317
- comparison_details = "**🔍 Model Comparison:**\\n"
318
- for model_key, pred_data in model_predictions.items():
319
- comparison_details += f"\\n**{self.model_configs[model_key]['name']}:**\\n"
320
- for i, (label, prob) in enumerate(zip(self.label_names, pred_data['probabilities'])):
321
- comparison_details += f" - {label}: {prob:.2%}\\n"
322
-
323
- return result_text, prob_dict, comparison_details
324
-
325
- except Exception as e:
326
- logger.error(f"Prediction error: {e}")
327
- return f"Error during prediction: {str(e)}", {}, ""
328
-
329
- # Initialize ensemble model
330
- try:
331
- ensemble = FinancialSentimentEnsemble()
332
- available_models = list(ensemble.models.keys())
333
- gpu_info = f"🚀 **Models loaded:** {len(available_models)} models on {ensemble.device}"
334
- except Exception as e:
335
- gpu_info = f" **Error loading models:** {str(e)}"
336
- ensemble = None
337
- available_models = []
338
-
339
- def analyze_sentiment(text, model_selection, use_rules):
340
- """Main analysis function"""
341
- if ensemble is None:
342
- return "Models not loaded. Please check the error above.", {}, ""
343
-
344
- return ensemble.predict_ensemble(text, model_selection, use_rules)
345
-
346
- # Example texts for testing
347
- examples = [
348
- ["Tesla stock is soaring after excellent Q3 earnings report! 🚀", "smart_ensemble", True],
349
- ["The market is showing mixed signals today, uncertain direction.", "smart_ensemble", True],
350
- ["Major selloff expected as inflation concerns grow. Bearish outlook.", "all_models", True],
351
- ["Apple announces new iPhone with revolutionary features!", "distilbert", False],
352
- ["Economic indicators suggest potential recession ahead.", "bert_large", True],
353
- ["Crypto market rebounds strongly after recent dip.", "smart_ensemble", True]
354
- ]
355
-
356
- # Create Gradio interface
357
- with gr.Blocks(
358
- title="Financial Sentiment Analysis - Ensemble System",
359
- theme=gr.themes.Soft(),
360
- css="""
361
- .gradio-container {
362
- max-width: 1000px !important;
363
- margin: auto !important;
364
- }
365
- .header {
366
- text-align: center;
367
- padding: 20px;
368
- background: linear-gradient(90deg, #667eea 0%, #764ba2 100%);
369
- color: white;
370
- border-radius: 10px;
371
- margin-bottom: 20px;
372
- }
373
- .model-info {
374
- background-color: #f8f9fa;
375
- padding: 15px;
376
- border-radius: 8px;
377
- margin: 10px 0;
378
- }
379
- """
380
- ) as demo:
381
-
382
- gr.HTML(f"""
383
- <div class="header">
384
- <h1>📈 Financial Sentiment Analysis Ensemble</h1>
385
- <h3>Advanced AI-powered sentiment analysis for financial texts using an ensemble of 3 fine-tuned models</h3>
386
- <p>{gpu_info}</p>
387
- </div>
388
- """)
389
-
390
- with gr.Row():
391
- with gr.Column(scale=2):
392
- text_input = gr.Textbox(
393
- label="📝 Enter Financial Text to Analyze",
394
- placeholder="Enter financial news, tweets, or market commentary...",
395
- lines=4
396
- )
397
-
398
- with gr.Row():
399
- model_selection = gr.Dropdown(
400
- choices=[
401
- ("🧠 Smart Ensemble (Recommended)", "smart_ensemble"),
402
- ("🎯 All Models Ensemble", "all_models"),
403
- ("⚡ DistilBERT (Fast)", "distilbert"),
404
- ("🔥 BERT-Large (Advanced)", "bert_large"),
405
- ("🚀 Improved Model", "improved")
406
- ],
407
- value="smart_ensemble",
408
- label="🤖 Model Selection"
409
- )
410
-
411
- use_rules = gr.Checkbox(
412
- label="🤖 Rule-Based Enhancement",
413
- value=True,
414
- info="Apply keyword-based post-processing"
415
- )
416
-
417
- analyze_btn = gr.Button("🔍 Analyze Sentiment", variant="primary", size="lg")
418
-
419
- with gr.Column(scale=2):
420
- result_output = gr.Textbox(
421
- label="📊 Analysis Results",
422
- lines=12,
423
- interactive=False
424
- )
425
-
426
- prob_output = gr.Label(
427
- label="📈 Probability Distribution",
428
- num_top_classes=3
429
- )
430
-
431
- with gr.Row():
432
- comparison_output = gr.Textbox(
433
- label="🔍 Model Comparison Details",
434
- lines=8,
435
- interactive=False,
436
- visible=True
437
- )
438
-
439
- # Event handlers
440
- analyze_btn.click(
441
- fn=analyze_sentiment,
442
- inputs=[text_input, model_selection, use_rules],
443
- outputs=[result_output, prob_output, comparison_output]
444
- )
445
-
446
- # Examples section
447
- gr.Examples(
448
- examples=examples,
449
- inputs=[text_input, model_selection, use_rules],
450
- outputs=[result_output, prob_output, comparison_output],
451
- fn=analyze_sentiment,
452
- cache_examples=False,
453
- label="💡 Try these examples:"
454
- )
455
-
456
- # Model information
457
- gr.HTML("""
458
- <div class="model-info">
459
- <h4>🤖 Ensemble System Information</h4>
460
- <ul>
461
- <li><strong>🧠 Smart Ensemble:</strong> DistilBERT + BERT-Large (Best balance of speed and accuracy)</li>
462
- <li><strong>🎯 All Models:</strong> DistilBERT + Improved + BERT-Large (Maximum consensus)</li>
463
- <li><strong>⚡ DistilBERT:</strong> Fast and efficient model optimized for real-time analysis</li>
464
- <li><strong>🔥 BERT-Large:</strong> Most advanced model with deep contextual understanding</li>
465
- <li><strong>🚀 Improved Model:</strong> Enhanced with advanced training techniques</li>
466
- </ul>
467
- <p><em>💡 Tip: Smart Ensemble provides the best balance of accuracy and performance!</em></p>
468
- <p><em>🤖 Rule Engine: Applies keyword-based post-processing to improve accuracy on financial texts</em></p>
469
- </div>
470
- """)
471
-
472
- if __name__ == "__main__":
473
- demo.launch()
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ Financial Sentiment Analysis - Enhanced Ensemble Gradio Demo for Hugging Face Space
4
+ Yerel uygulamayla tam uyumlu versiyon
5
+ """
6
+
7
+ import gradio as gr
8
+ import torch
9
+ import numpy as np
10
+ from transformers import AutoTokenizer, AutoModelForSequenceClassification
11
+ import logging
12
+ import re
13
+ from typing import Dict, List, Tuple
14
+
15
+ # Logging setup
16
+ logging.basicConfig(level=logging.INFO)
17
+ logger = logging.getLogger(__name__)
18
+
19
+ class SentimentRuleEngine:
20
+ """Rule-based post-processing for sentiment analysis"""
21
+
22
+ def __init__(self):
23
+ # Strong bullish keywords with weights
24
+ self.bullish_keywords = {
25
+ 'soaring': 0.9, 'skyrocketing': 0.9, 'surging': 0.9, 'exploding': 0.9,
26
+ 'excellent': 0.8, 'outstanding': 0.8, 'exceptional': 0.8, 'amazing': 0.8,
27
+ 'breakthrough': 0.8, 'revolutionary': 0.8, 'record-breaking': 0.9,
28
+ 'all-time high': 0.9, 'new high': 0.8, 'moon': 0.8, 'rocket': 0.8,
29
+ 'mooning': 0.9, 'rocketing': 0.8, 'booming': 0.7, 'thriving': 0.7,
30
+ 'up 10%': 0.8, 'up 15%': 0.9, 'up 20%': 0.9, 'gained 10%': 0.8,
31
+ 'rose 15%': 0.8, 'jumped 20%': 0.9, 'spiked': 0.8, 'surged': 0.8,
32
+ 'rising': 0.6, 'climbing': 0.6, 'gaining': 0.6, 'growing': 0.6,
33
+ 'strong': 0.5, 'solid': 0.5, 'robust': 0.5, 'healthy': 0.5,
34
+ 'positive': 0.4, 'optimistic': 0.5, 'bullish': 0.8, 'rally': 0.7,
35
+ 'beat': 0.7, 'exceeded': 0.7, 'outperformed': 0.7, 'success': 0.6,
36
+ 'profit': 0.3, 'earnings': 0.2, 'revenue': 0.2, 'growth': 0.5
37
+ }
38
+
39
+ # Strong bearish keywords with weights
40
+ self.bearish_keywords = {
41
+ 'crashing': 0.9, 'plummeting': 0.9, 'collapsing': 0.9, 'tanking': 0.9,
42
+ 'disaster': 0.8, 'terrible': 0.8, 'awful': 0.8, 'horrible': 0.8,
43
+ 'crisis': 0.7, 'recession': 0.8, 'bankruptcy': 0.9, 'failed': 0.7,
44
+ 'down 10%': 0.8, 'down 15%': 0.9, 'down 20%': 0.9, 'lost 10%': 0.8,
45
+ 'fell 15%': 0.8, 'dropped 20%': 0.9, 'plunged': 0.8, 'tumbled': 0.7,
46
+ 'falling': 0.6, 'declining': 0.6, 'dropping': 0.6, 'losing': 0.6,
47
+ 'weak': 0.5, 'poor': 0.5, 'bad': 0.4, 'negative': 0.4,
48
+ 'bearish': 0.8, 'selloff': 0.7, 'sell-off': 0.7, 'correction': 0.6,
49
+ 'missed': 0.6, 'disappointed': 0.6, 'concerns': 0.4, 'worried': 0.5
50
+ }
51
+
52
+ def extract_keywords(self, text: str) -> Dict[str, float]:
53
+ """Extract and score keywords from text"""
54
+ text_lower = text.lower()
55
+ found_keywords = {}
56
+
57
+ # Check bullish keywords
58
+ for keyword, weight in self.bullish_keywords.items():
59
+ if keyword in text_lower:
60
+ found_keywords[keyword] = weight
61
+
62
+ # Check bearish keywords
63
+ for keyword, weight in self.bearish_keywords.items():
64
+ if keyword in text_lower:
65
+ found_keywords[keyword] = -weight # Negative for bearish
66
+
67
+ return found_keywords
68
+
69
+ def apply_rules(self, text: str, model_probabilities: np.ndarray,
70
+ confidence_threshold: float = 0.7) -> Tuple[np.ndarray, str]:
71
+ """Apply rule-based post-processing"""
72
+
73
+ keywords = self.extract_keywords(text)
74
+ if not keywords:
75
+ return model_probabilities, "No significant keywords found"
76
+
77
+ # Calculate keyword score
78
+ keyword_score = sum(keywords.values())
79
+
80
+ # Get model's confidence
81
+ max_prob = np.max(model_probabilities)
82
+
83
+ # Apply rules only if model confidence is low
84
+ if max_prob < confidence_threshold:
85
+ adjustment_strength = 0.3 # How much to adjust
86
+
87
+ if keyword_score > 0.5: # Strong bullish keywords
88
+ # Boost bullish probability
89
+ model_probabilities[2] += adjustment_strength
90
+ model_probabilities[0] -= adjustment_strength * 0.5
91
+ model_probabilities[1] -= adjustment_strength * 0.5
92
+ rule_msg = f"Bullish keywords detected (score: {keyword_score:.2f}), boosting bullish probability"
93
+
94
+ elif keyword_score < -0.5: # Strong bearish keywords
95
+ # Boost bearish probability
96
+ model_probabilities[0] += adjustment_strength
97
+ model_probabilities[1] -= adjustment_strength * 0.5
98
+ model_probabilities[2] -= adjustment_strength * 0.5
99
+ rule_msg = f"Bearish keywords detected (score: {keyword_score:.2f}), boosting bearish probability"
100
+ else:
101
+ rule_msg = f"Mixed signals (score: {keyword_score:.2f}), no adjustment"
102
+ else:
103
+ rule_msg = f"High model confidence ({max_prob:.2%}), rules not applied"
104
+
105
+ # Normalize probabilities
106
+ model_probabilities = np.maximum(model_probabilities, 0)
107
+ model_probabilities = model_probabilities / np.sum(model_probabilities)
108
+
109
+ return model_probabilities, rule_msg
110
+
111
+ # Initialize rule engine
112
+ rule_engine = SentimentRuleEngine()
113
+
114
+ class EnsembleFinancialPredictor:
115
+ """Yerel uygulamayla tam uyumlu ensemble predictor"""
116
+
117
+ def __init__(self):
118
+ self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
119
+ self.models = {}
120
+ self.tokenizers = {}
121
+ self.label_names = ["Bearish 📉", "Neutral ⚖️", "Bullish 📈"]
122
+
123
+ # Yerel uygulamayla aynı model isimleri ve yapılandırması
124
+ self.model_info = {
125
+ "distilbert": {
126
+ "name": "DistilBERT (Fast)",
127
+ "repo_id": "codealchemist01/financial-sentiment-distilbert",
128
+ "description": "Hızlı ve verimli model (87.96% doğruluk)"
129
+ },
130
+ "balanced": {
131
+ "name": "Balanced Model",
132
+ "repo_id": "codealchemist01/financial-sentiment-improved", # Improved model as balanced
133
+ "description": "Dengeli performans modeli"
134
+ },
135
+ "advanced": { # YERELDEKİ GİBİ "advanced" ismi
136
+ "name": "BERT-Large (Advanced)",
137
+ "repo_id": "codealchemist01/financial-sentiment-bert-large",
138
+ "description": "En gelişmiş model (85.85% doğruluk)"
139
+ }
140
+ }
141
+
142
+ # Yerel uygulamayla AYNI ensemble weights
143
+ self.ensemble_weights = {
144
+ "smart_ensemble": {"distilbert": 0.3, "advanced": 0.7}, # ADVANCED ismi!
145
+ "all_models": {"distilbert": 0.2, "balanced": 0.3, "advanced": 0.5}
146
+ }
147
+
148
+ self.load_models()
149
+
150
+ def load_models(self):
151
+ """Load all available models from Hugging Face Hub"""
152
+ loaded_models = []
153
+
154
+ for model_key, model_info in self.model_info.items():
155
+ try:
156
+ logger.info(f"Loading {model_info['name']} from {model_info['repo_id']}")
157
+
158
+ tokenizer = AutoTokenizer.from_pretrained(model_info["repo_id"])
159
+ model = AutoModelForSequenceClassification.from_pretrained(model_info["repo_id"])
160
+ model.to(self.device)
161
+ model.eval()
162
+
163
+ self.tokenizers[model_key] = tokenizer
164
+ self.models[model_key] = model
165
+ loaded_models.append(model_info["name"])
166
+
167
+ logger.info(f" {model_info['name']} loaded successfully")
168
+
169
+ except Exception as e:
170
+ logger.error(f"❌ Error loading {model_info['name']}: {e}")
171
+
172
+ logger.info(f"🎯 Total loaded models: {len(loaded_models)}")
173
+ return loaded_models
174
+
175
+ def predict_single_model(self, text, model_key):
176
+ """Predict with a single model"""
177
+ if model_key not in self.models:
178
+ return None, f"Model {model_key} not available"
179
+
180
+ try:
181
+ tokenizer = self.tokenizers[model_key]
182
+ model = self.models[model_key]
183
+
184
+ inputs = tokenizer(
185
+ text,
186
+ return_tensors="pt",
187
+ truncation=True,
188
+ padding=True,
189
+ max_length=512
190
+ )
191
+ inputs = {k: v.to(self.device) for k, v in inputs.items()}
192
+
193
+ with torch.no_grad():
194
+ outputs = model(**inputs)
195
+ probabilities = torch.nn.functional.softmax(outputs.logits, dim=-1)
196
+ probabilities = probabilities.cpu().numpy()[0]
197
+
198
+ return probabilities, None
199
+
200
+ except Exception as e:
201
+ return None, f"Error in {model_key}: {str(e)}"
202
+
203
+ def predict_ensemble(self, text, ensemble_type="smart_ensemble", use_rules=True):
204
+ """Predict using ensemble approach - yerel uygulamayla aynı mantık"""
205
+ if not text.strip():
206
+ return "Please enter some text to analyze.", {}, ""
207
+
208
+ try:
209
+ # Get predictions from all models
210
+ model_predictions = {}
211
+ model_details = []
212
+
213
+ if ensemble_type == "smart_ensemble":
214
+ # Use best performing combination: DistilBERT + BERT-large (ADVANCED)
215
+ weights = self.ensemble_weights["smart_ensemble"]
216
+ models_to_use = ["distilbert", "advanced"] # ADVANCED ismi!
217
+ elif ensemble_type == "all_models":
218
+ # Use all three models
219
+ weights = self.ensemble_weights["all_models"]
220
+ models_to_use = ["distilbert", "balanced", "advanced"]
221
+ else:
222
+ # Single model prediction
223
+ models_to_use = [ensemble_type]
224
+ weights = {ensemble_type: 1.0}
225
+
226
+ # Get predictions from each model
227
+ ensemble_probabilities = np.zeros(3)
228
+ total_weight = 0
229
+
230
+ for model_key in models_to_use:
231
+ if model_key in self.models:
232
+ probabilities, error = self.predict_single_model(text, model_key)
233
+ if probabilities is not None:
234
+ weight = weights.get(model_key, 1.0)
235
+ ensemble_probabilities += probabilities * weight
236
+ total_weight += weight
237
+
238
+ # Store individual model results
239
+ predicted_class = np.argmax(probabilities)
240
+ confidence = probabilities[predicted_class]
241
+ model_predictions[model_key] = {
242
+ "prediction": self.label_names[predicted_class],
243
+ "confidence": float(confidence),
244
+ "probabilities": probabilities.tolist()
245
+ }
246
+
247
+ model_details.append(
248
+ f"**{self.model_info[model_key]['name']}:** "
249
+ f"{self.label_names[predicted_class]} ({confidence:.2%})"
250
+ )
251
+
252
+ if total_weight == 0:
253
+ return "No models available for prediction.", {}, ""
254
+
255
+ # Normalize ensemble probabilities
256
+ ensemble_probabilities = ensemble_probabilities / total_weight
257
+
258
+ # Store original probabilities
259
+ original_probabilities = ensemble_probabilities.copy()
260
+
261
+ # Apply rule-based post-processing if enabled
262
+ rule_explanation = ""
263
+ if use_rules:
264
+ ensemble_probabilities, rule_explanation = rule_engine.apply_rules(
265
+ text, ensemble_probabilities, confidence_threshold=0.7
266
+ )
267
+
268
+ # Get final prediction
269
+ predicted_class = np.argmax(ensemble_probabilities)
270
+ confidence = ensemble_probabilities[predicted_class]
271
+
272
+ # Create detailed results
273
+ if len(models_to_use) > 1:
274
+ result_text = f"**🎯 Ensemble Prediction:** {self.label_names[predicted_class]}\n"
275
+ result_text += f"**🔥 Ensemble Confidence:** {confidence:.2%}\n\n"
276
+
277
+ result_text += "**🤖 Individual Model Results:**\n"
278
+ for detail in model_details:
279
+ result_text += f"- {detail}\n"
280
+ result_text += "\n"
281
+ else:
282
+ result_text = f"**🎯 Prediction:** {self.label_names[predicted_class]}\n"
283
+ result_text += f"**🔥 Confidence:** {confidence:.2%}\n\n"
284
+
285
+ # Show rule engine effects if applied
286
+ if use_rules and rule_explanation:
287
+ result_text += f"**🤖 Rule Engine:** {rule_explanation}\n\n"
288
+
289
+ result_text += "**📊 Final Probabilities:**\n"
290
+
291
+ # Create probability dictionary for gradio
292
+ prob_dict = {}
293
+ for i, (label, prob) in enumerate(zip(self.label_names, ensemble_probabilities)):
294
+ prob_dict[label] = float(prob)
295
+ result_text += f"- {label}: {prob:.2%}\n"
296
+
297
+ # Create model comparison details
298
+ comparison_details = ""
299
+ if len(model_predictions) > 1:
300
+ comparison_details = "**🔍 Model Comparison:**\n"
301
+ for model_key, pred_data in model_predictions.items():
302
+ comparison_details += f"\n**{self.model_info[model_key]['name']}:**\n"
303
+ for i, (label, prob) in enumerate(zip(self.label_names, pred_data['probabilities'])):
304
+ comparison_details += f" - {label}: {prob:.2%}\n"
305
+
306
+ return result_text, prob_dict, comparison_details
307
+
308
+ except Exception as e:
309
+ logger.error(f"Prediction error: {e}")
310
+ return f"Error during prediction: {str(e)}", {}, ""
311
+
312
+ # Initialize predictor
313
+ try:
314
+ predictor = EnsembleFinancialPredictor()
315
+ available_models = list(predictor.models.keys())
316
+ gpu_info = f"🚀 **Models loaded:** {len(available_models)} models on {predictor.device}"
317
+ except Exception as e:
318
+ gpu_info = f"❌ **Error loading models:** {str(e)}"
319
+ predictor = None
320
+ available_models = []
321
+
322
+ def analyze_sentiment(text, model_selection, use_rules):
323
+ """Main analysis function"""
324
+ if predictor is None:
325
+ return "Model not loaded. Please check the error above.", {}, ""
326
+
327
+ return predictor.predict_ensemble(text, model_selection, use_rules)
328
+
329
+ # Example texts - yerel uygulamayla aynı
330
+ examples = [
331
+ ["Tesla stock is soaring after excellent Q3 earnings report! 🚀", "smart_ensemble", True],
332
+ ["The market is showing mixed signals today, uncertain direction.", "smart_ensemble", True],
333
+ ["Major selloff expected as inflation concerns grow. Bearish outlook.", "all_models", True],
334
+ ["Apple announces new iPhone with revolutionary features!", "distilbert", False],
335
+ ["Economic indicators suggest potential recession ahead.", "advanced", True], # ADVANCED ismi!
336
+ ["Crypto market rebounds strongly after recent dip.", "smart_ensemble", True]
337
+ ]
338
+
339
+ # Create Gradio interface - yerel uygulamayla aynı stil
340
+ with gr.Blocks(
341
+ title="Financial Sentiment Analysis - Ensemble System",
342
+ theme=gr.themes.Soft(),
343
+ css="""
344
+ .gradio-container {
345
+ max-width: 1000px !important;
346
+ margin: auto !important;
347
+ }
348
+ .header {
349
+ text-align: center;
350
+ padding: 20px;
351
+ background: linear-gradient(90deg, #667eea 0%, #764ba2 100%);
352
+ color: white;
353
+ border-radius: 10px;
354
+ margin-bottom: 20px;
355
+ }
356
+ .model-info {
357
+ background-color: #f8f9fa;
358
+ padding: 15px;
359
+ border-radius: 8px;
360
+ margin: 10px 0;
361
+ }
362
+ """
363
+ ) as demo:
364
+
365
+ gr.HTML(f"""
366
+ <div class="header">
367
+ <h1>🏦 Financial Sentiment Analysis - Ensemble System</h1>
368
+ <h3>🚀 3-Model Ensemble - Advanced AI Analysis</h3>
369
+ <p>{gpu_info}</p>
370
+ </div>
371
+ """)
372
+
373
+ with gr.Row():
374
+ with gr.Column(scale=2):
375
+ text_input = gr.Textbox(
376
+ label="📝 Financial Text to Analyze",
377
+ placeholder="Enter financial news, tweets, or market commentary...",
378
+ lines=4
379
+ )
380
+
381
+ with gr.Row():
382
+ model_selection = gr.Dropdown(
383
+ choices=[
384
+ ("🧠 Smart Ensemble (Recommended)", "smart_ensemble"),
385
+ ("🎯 All Models Ensemble", "all_models"),
386
+ ("⚡ DistilBERT (Fast)", "distilbert"),
387
+ ("⚖️ Balanced Model", "balanced"),
388
+ ("🔥 BERT-Large (Advanced)", "advanced") # ADVANCED ismi!
389
+ ],
390
+ value="smart_ensemble",
391
+ label="🤖 Model Selection"
392
+ )
393
+
394
+ use_rules = gr.Checkbox(
395
+ label="🤖 Rule-Based Enhancement",
396
+ value=True,
397
+ info="Apply keyword-based post-processing"
398
+ )
399
+
400
+ analyze_btn = gr.Button("🔍 Analyze Sentiment", variant="primary", size="lg")
401
+
402
+ with gr.Column(scale=2):
403
+ result_output = gr.Textbox(
404
+ label="📊 Analysis Results",
405
+ lines=12,
406
+ interactive=False
407
+ )
408
+
409
+ prob_output = gr.Label(
410
+ label="📈 Probability Distribution",
411
+ num_top_classes=3
412
+ )
413
+
414
+ with gr.Row():
415
+ comparison_output = gr.Textbox(
416
+ label="🔍 Model Comparison Details",
417
+ lines=8,
418
+ interactive=False,
419
+ visible=True
420
+ )
421
+
422
+ # Event handlers
423
+ analyze_btn.click(
424
+ fn=analyze_sentiment,
425
+ inputs=[text_input, model_selection, use_rules],
426
+ outputs=[result_output, prob_output, comparison_output]
427
+ )
428
+
429
+ # Examples
430
+ gr.Examples(
431
+ examples=examples,
432
+ inputs=[text_input, model_selection, use_rules],
433
+ outputs=[result_output, prob_output, comparison_output],
434
+ fn=analyze_sentiment,
435
+ cache_examples=False
436
+ )
437
+
438
+ # Model information - yerel uygulamayla aynı
439
+ gr.HTML("""
440
+ <div class="model-info">
441
+ <h4>🤖 Ensemble System Information</h4>
442
+ <ul>
443
+ <li><strong>🧠 Smart Ensemble:</strong> DistilBERT + BERT-Large (79.7% average accuracy)</li>
444
+ <li><strong>🎯 All Models:</strong> DistilBERT + Balanced + BERT-Large (79.1% average accuracy)</li>
445
+ <li><strong>⚡ DistilBERT:</strong> Fast and efficient (87.96% accuracy)</li>
446
+ <li><strong>⚖️ Balanced Model:</strong> Optimized for balanced performance</li>
447
+ <li><strong>🔥 BERT-Large:</strong> Most advanced model (85.85% accuracy)</li>
448
+ </ul>
449
+ <p><em>💡 Tip: Smart Ensemble provides the best balance of accuracy and performance!</em></p>
450
+ <p><em>🤖 Rule Engine: Keyword-based post-processing improves accuracy on financial texts</em></p>
451
+ </div>
452
+ """)
453
+
454
+ if __name__ == "__main__":
455
+ demo.launch(
456
+ server_name="0.0.0.0",
457
+ server_port=7860,
458
+ share=False,
459
+ show_error=True
460
+ )