torxyton commited on
Commit
215b833
·
1 Parent(s): a53fe69

feat: Implementa sistema ensemble avançado de IA com múltiplos modelos

Browse files

- Adiciona EnsembleAI com FinBERT, DistilBERT, RoBERTa e BERT-Base
- Implementa sistema de votação inteligente com 6 estratégias
- Adiciona otimizador de performance com cache inteligente
- Integra processamento paralelo e métricas em tempo real
- Melhora precisão e velocidade das análises de sentimento

src/ai/__pycache__/ensemble_ai.cpython-313.pyc ADDED
Binary file (23.8 kB). View file
 
src/ai/__pycache__/performance_optimizer.cpython-313.pyc ADDED
Binary file (33.8 kB). View file
 
src/ai/__pycache__/voting_system.cpython-313.pyc ADDED
Binary file (26.5 kB). View file
 
src/ai/ensemble_ai.py ADDED
@@ -0,0 +1,493 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ Sistema Ensemble de IA Avançado para Análise de Sentimento Financeiro
5
+ Combina múltiplos modelos para melhor precisão e confiabilidade
6
+ """
7
+
8
+ import asyncio
9
+ import logging
10
+ import time
11
+ from concurrent.futures import ThreadPoolExecutor, as_completed
12
+ from dataclasses import dataclass, asdict
13
+ from datetime import datetime, timedelta
14
+ from typing import Dict, List, Optional, Any, Tuple
15
+ from collections import defaultdict
16
+ import numpy as np
17
+ import torch
18
+ from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification
19
+ import warnings
20
+ warnings.filterwarnings('ignore')
21
+
22
+ # Importar sistema de otimização
23
+ try:
24
+ from .performance_optimizer import performance_optimizer, optimize_ai_analysis
25
+ except ImportError:
26
+ # Fallback se não conseguir importar
27
+ performance_optimizer = None
28
+
29
+ async def optimize_ai_analysis(func, text, use_cache=True):
30
+ return await func(text) if asyncio.iscoroutinefunction(func) else func(text)
31
+
32
+ import json
33
+ import hashlib
34
+ from functools import lru_cache
35
+
36
+ try:
37
+ from transformers import (
38
+ AutoTokenizer, AutoModelForSequenceClassification,
39
+ pipeline, BertTokenizer, BertForSequenceClassification
40
+ )
41
+ TRANSFORMERS_AVAILABLE = True
42
+ except ImportError:
43
+ TRANSFORMERS_AVAILABLE = False
44
+ logging.warning("Transformers não disponível. Sistema ensemble funcionará em modo limitado.")
45
+
46
+ try:
47
+ import torch
48
+ TORCH_AVAILABLE = True
49
+ except ImportError:
50
+ TORCH_AVAILABLE = False
51
+
52
+ @dataclass
53
+ class ModelPrediction:
54
+ """Resultado de predição de um modelo individual"""
55
+ model_name: str
56
+ confidence: float
57
+ prediction: str
58
+ sentiment_score: float
59
+ processing_time: float
60
+ metadata: Dict[str, Any]
61
+
62
+ @dataclass
63
+ class EnsembleResult:
64
+ """Resultado final do ensemble"""
65
+ final_prediction: str
66
+ confidence: float
67
+ sentiment_score: float
68
+ individual_predictions: List[ModelPrediction]
69
+ consensus_strength: float
70
+ processing_time: float
71
+ model_weights: Dict[str, float]
72
+
73
+ class ModelCache:
74
+ """Sistema de cache inteligente para otimizar performance"""
75
+
76
+ def __init__(self, max_size: int = 1000):
77
+ self.cache = {}
78
+ self.max_size = max_size
79
+ self.access_count = {}
80
+
81
+ def _generate_key(self, text: str, model_name: str) -> str:
82
+ """Gera chave única para cache"""
83
+ combined = f"{model_name}:{text}"
84
+ return hashlib.md5(combined.encode()).hexdigest()
85
+
86
+ def get(self, text: str, model_name: str) -> Optional[ModelPrediction]:
87
+ """Recupera resultado do cache"""
88
+ key = self._generate_key(text, model_name)
89
+ if key in self.cache:
90
+ self.access_count[key] = self.access_count.get(key, 0) + 1
91
+ return self.cache[key]
92
+ return None
93
+
94
+ def set(self, text: str, model_name: str, result: ModelPrediction):
95
+ """Armazena resultado no cache"""
96
+ if len(self.cache) >= self.max_size:
97
+ self._evict_least_used()
98
+
99
+ key = self._generate_key(text, model_name)
100
+ self.cache[key] = result
101
+ self.access_count[key] = 1
102
+
103
+ def _evict_least_used(self):
104
+ """Remove item menos usado do cache"""
105
+ if not self.cache:
106
+ return
107
+
108
+ least_used_key = min(self.access_count.keys(), key=lambda k: self.access_count[k])
109
+ del self.cache[least_used_key]
110
+ del self.access_count[least_used_key]
111
+
112
+ class AIModel:
113
+ """Classe base para modelos de IA"""
114
+
115
+ def __init__(self, name: str, model_path: str, weight: float = 1.0):
116
+ self.name = name
117
+ self.model_path = model_path
118
+ self.weight = weight
119
+ self.model = None
120
+ self.tokenizer = None
121
+ self.is_loaded = False
122
+
123
+ async def load_model(self):
124
+ """Carrega modelo de forma assíncrona"""
125
+ if not TRANSFORMERS_AVAILABLE:
126
+ logging.warning(f"Modelo {self.name} não pode ser carregado - Transformers indisponível")
127
+ return False
128
+
129
+ try:
130
+ loop = asyncio.get_event_loop()
131
+ with ThreadPoolExecutor() as executor:
132
+ future = loop.run_in_executor(executor, self._load_model_sync)
133
+ await future
134
+ self.is_loaded = True
135
+ logging.info(f"Modelo {self.name} carregado com sucesso")
136
+ return True
137
+ except Exception as e:
138
+ logging.error(f"Erro ao carregar modelo {self.name}: {e}")
139
+ return False
140
+
141
+ def _load_model_sync(self):
142
+ """Carregamento síncrono do modelo"""
143
+ self.tokenizer = AutoTokenizer.from_pretrained(self.model_path)
144
+ self.model = AutoModelForSequenceClassification.from_pretrained(self.model_path)
145
+
146
+ async def predict(self, text: str) -> ModelPrediction:
147
+ """Faz predição com o modelo"""
148
+ start_time = datetime.now()
149
+
150
+ if not self.is_loaded:
151
+ await self.load_model()
152
+
153
+ if not self.is_loaded:
154
+ # Fallback para modelo mock
155
+ return self._mock_prediction(text, start_time)
156
+
157
+ try:
158
+ loop = asyncio.get_event_loop()
159
+ with ThreadPoolExecutor() as executor:
160
+ future = loop.run_in_executor(executor, self._predict_sync, text)
161
+ result = await future
162
+
163
+ processing_time = (datetime.now() - start_time).total_seconds()
164
+
165
+ return ModelPrediction(
166
+ model_name=self.name,
167
+ confidence=result['confidence'],
168
+ prediction=result['prediction'],
169
+ sentiment_score=result['sentiment_score'],
170
+ processing_time=processing_time,
171
+ metadata=result.get('metadata', {})
172
+ )
173
+ except Exception as e:
174
+ logging.error(f"Erro na predição do modelo {self.name}: {e}")
175
+ return self._mock_prediction(text, start_time)
176
+
177
+ def _predict_sync(self, text: str) -> Dict[str, Any]:
178
+ """Predição síncrona"""
179
+ inputs = self.tokenizer(text, return_tensors="pt", truncation=True, padding=True)
180
+
181
+ with torch.no_grad() if TORCH_AVAILABLE else contextlib.nullcontext():
182
+ outputs = self.model(**inputs)
183
+ predictions = torch.nn.functional.softmax(outputs.logits, dim=-1)
184
+
185
+ confidence = float(torch.max(predictions))
186
+ predicted_class = int(torch.argmax(predictions))
187
+
188
+ # Mapear classe para sentimento
189
+ sentiment_map = {0: "NEGATIVO", 1: "NEUTRO", 2: "POSITIVO"}
190
+ prediction = sentiment_map.get(predicted_class, "NEUTRO")
191
+
192
+ # Calcular score de sentimento (-1 a 1)
193
+ sentiment_score = (predicted_class - 1) * confidence
194
+
195
+ return {
196
+ 'confidence': confidence,
197
+ 'prediction': prediction,
198
+ 'sentiment_score': sentiment_score,
199
+ 'metadata': {
200
+ 'predicted_class': predicted_class,
201
+ 'raw_predictions': predictions.tolist()
202
+ }
203
+ }
204
+
205
+ def _mock_prediction(self, text: str, start_time: datetime) -> ModelPrediction:
206
+ """Predição mock para fallback"""
207
+ processing_time = (datetime.now() - start_time).total_seconds()
208
+
209
+ # Análise simples baseada em palavras-chave
210
+ positive_words = ['alta', 'subida', 'compra', 'bull', 'positivo', 'ganho']
211
+ negative_words = ['baixa', 'queda', 'venda', 'bear', 'negativo', 'perda']
212
+
213
+ text_lower = text.lower()
214
+ pos_count = sum(1 for word in positive_words if word in text_lower)
215
+ neg_count = sum(1 for word in negative_words if word in text_lower)
216
+
217
+ if pos_count > neg_count:
218
+ prediction = "POSITIVO"
219
+ sentiment_score = 0.6
220
+ confidence = 0.7
221
+ elif neg_count > pos_count:
222
+ prediction = "NEGATIVO"
223
+ sentiment_score = -0.6
224
+ confidence = 0.7
225
+ else:
226
+ prediction = "NEUTRO"
227
+ sentiment_score = 0.0
228
+ confidence = 0.5
229
+
230
+ return ModelPrediction(
231
+ model_name=f"{self.name}_mock",
232
+ confidence=confidence,
233
+ prediction=prediction,
234
+ sentiment_score=sentiment_score,
235
+ processing_time=processing_time,
236
+ metadata={'method': 'keyword_analysis'}
237
+ )
238
+
239
+ class EnsembleAI:
240
+ """Sistema Ensemble de IA para Trading"""
241
+
242
+ def __init__(self):
243
+ self.models: List[AIModel] = []
244
+ self.cache = ModelCache()
245
+ self.performance_history = {}
246
+ self.logger = logging.getLogger(__name__)
247
+
248
+ # Inicializar modelos
249
+ self._initialize_models()
250
+
251
+ def _initialize_models(self):
252
+ """Inicializa os modelos do ensemble"""
253
+ model_configs = [
254
+ {
255
+ 'name': 'FinBERT',
256
+ 'path': 'ProsusAI/finbert',
257
+ 'weight': 1.2 # Peso maior para modelo especializado
258
+ },
259
+ {
260
+ 'name': 'DistilBERT-Financial',
261
+ 'path': 'distilbert-base-uncased',
262
+ 'weight': 1.0
263
+ },
264
+ {
265
+ 'name': 'RoBERTa-Sentiment',
266
+ 'path': 'cardiffnlp/twitter-roberta-base-sentiment-latest',
267
+ 'weight': 0.9
268
+ },
269
+ {
270
+ 'name': 'BERT-Base',
271
+ 'path': 'bert-base-uncased',
272
+ 'weight': 0.8
273
+ }
274
+ ]
275
+
276
+ for config in model_configs:
277
+ model = AIModel(
278
+ name=config['name'],
279
+ model_path=config['path'],
280
+ weight=config['weight']
281
+ )
282
+ self.models.append(model)
283
+
284
+ self.logger.info(f"Inicializados {len(self.models)} modelos no ensemble")
285
+
286
+ async def analyze_sentiment(self, text: str, use_cache: bool = True) -> EnsembleResult:
287
+ """Análise de sentimento usando ensemble de modelos com otimização"""
288
+ # Usar sistema de otimização se disponível
289
+ if performance_optimizer:
290
+ return await optimize_ai_analysis(
291
+ lambda t: self._analyze_sentiment_internal(t, use_cache),
292
+ text,
293
+ use_cache=use_cache
294
+ )
295
+ else:
296
+ return await self._analyze_sentiment_internal(text, use_cache)
297
+
298
+ async def _analyze_sentiment_internal(self, text: str, use_cache: bool = True) -> EnsembleResult:
299
+ """Implementação interna da análise de sentimento"""
300
+ start_time = datetime.now()
301
+
302
+ # Verificar cache primeiro
303
+ if use_cache:
304
+ cached_results = []
305
+ for model in self.models:
306
+ cached = self.cache.get(text, model.name)
307
+ if cached:
308
+ cached_results.append(cached)
309
+
310
+ if len(cached_results) == len(self.models):
311
+ self.logger.info("Resultado completo encontrado no cache")
312
+ return self._combine_predictions(cached_results, start_time)
313
+
314
+ # Executar predições em paralelo
315
+ tasks = []
316
+ for model in self.models:
317
+ if use_cache:
318
+ cached = self.cache.get(text, model.name)
319
+ if cached:
320
+ # Criar task que retorna resultado do cache
321
+ tasks.append(asyncio.create_task(self._return_cached(cached)))
322
+ continue
323
+
324
+ tasks.append(asyncio.create_task(model.predict(text)))
325
+
326
+ # Aguardar todas as predições
327
+ predictions = await asyncio.gather(*tasks, return_exceptions=True)
328
+
329
+ # Filtrar exceções e armazenar no cache
330
+ valid_predictions = []
331
+ for i, pred in enumerate(predictions):
332
+ if isinstance(pred, Exception):
333
+ self.logger.error(f"Erro na predição do modelo {self.models[i].name}: {pred}")
334
+ continue
335
+
336
+ valid_predictions.append(pred)
337
+
338
+ # Armazenar no cache
339
+ if use_cache:
340
+ self.cache.set(text, pred.model_name, pred)
341
+
342
+ if not valid_predictions:
343
+ self.logger.error("Nenhuma predição válida obtida")
344
+ return self._fallback_result(text, start_time)
345
+
346
+ return self._combine_predictions(valid_predictions, start_time)
347
+
348
+ async def _return_cached(self, cached_result: ModelPrediction) -> ModelPrediction:
349
+ """Retorna resultado do cache de forma assíncrona"""
350
+ return cached_result
351
+
352
+ def _combine_predictions(self, predictions: List[ModelPrediction], start_time: datetime) -> EnsembleResult:
353
+ """Combina predições usando votação ponderada"""
354
+ if not predictions:
355
+ return self._fallback_result("", start_time)
356
+
357
+ # Calcular pesos baseados na performance histórica
358
+ model_weights = self._calculate_dynamic_weights(predictions)
359
+
360
+ # Votação ponderada para sentimento
361
+ sentiment_scores = []
362
+ confidences = []
363
+
364
+ for pred in predictions:
365
+ weight = model_weights.get(pred.model_name, 1.0)
366
+ sentiment_scores.append(pred.sentiment_score * weight * pred.confidence)
367
+ confidences.append(pred.confidence * weight)
368
+
369
+ # Calcular resultado final
370
+ weighted_sentiment = sum(sentiment_scores) / sum(confidences) if confidences else 0.0
371
+ final_confidence = np.mean(confidences) if confidences else 0.5
372
+
373
+ # Determinar predição final
374
+ if weighted_sentiment > 0.1:
375
+ final_prediction = "POSITIVO"
376
+ elif weighted_sentiment < -0.1:
377
+ final_prediction = "NEGATIVO"
378
+ else:
379
+ final_prediction = "NEUTRO"
380
+
381
+ # Calcular força do consenso
382
+ consensus_strength = self._calculate_consensus(predictions)
383
+
384
+ processing_time = (datetime.now() - start_time).total_seconds()
385
+
386
+ return EnsembleResult(
387
+ final_prediction=final_prediction,
388
+ confidence=final_confidence,
389
+ sentiment_score=weighted_sentiment,
390
+ individual_predictions=predictions,
391
+ consensus_strength=consensus_strength,
392
+ processing_time=processing_time,
393
+ model_weights=model_weights
394
+ )
395
+
396
+ def _calculate_dynamic_weights(self, predictions: List[ModelPrediction]) -> Dict[str, float]:
397
+ """Calcula pesos dinâmicos baseados na performance histórica"""
398
+ weights = {}
399
+
400
+ for pred in predictions:
401
+ base_weight = next((m.weight for m in self.models if m.name == pred.model_name), 1.0)
402
+
403
+ # Ajustar peso baseado na performance histórica
404
+ historical_performance = self.performance_history.get(pred.model_name, 0.8)
405
+
406
+ # Ajustar peso baseado na confiança atual
407
+ confidence_factor = pred.confidence
408
+
409
+ # Peso final
410
+ final_weight = base_weight * historical_performance * confidence_factor
411
+ weights[pred.model_name] = final_weight
412
+
413
+ return weights
414
+
415
+ def _calculate_consensus(self, predictions: List[ModelPrediction]) -> float:
416
+ """Calcula força do consenso entre modelos"""
417
+ if len(predictions) < 2:
418
+ return 1.0
419
+
420
+ # Contar predições por categoria
421
+ prediction_counts = {}
422
+ for pred in predictions:
423
+ prediction_counts[pred.prediction] = prediction_counts.get(pred.prediction, 0) + 1
424
+
425
+ # Calcular consenso
426
+ max_count = max(prediction_counts.values())
427
+ consensus_strength = max_count / len(predictions)
428
+
429
+ return consensus_strength
430
+
431
+ def _fallback_result(self, text: str, start_time: datetime) -> EnsembleResult:
432
+ """Resultado de fallback quando todos os modelos falham"""
433
+ processing_time = (datetime.now() - start_time).total_seconds()
434
+
435
+ return EnsembleResult(
436
+ final_prediction="NEUTRO",
437
+ confidence=0.3,
438
+ sentiment_score=0.0,
439
+ individual_predictions=[],
440
+ consensus_strength=0.0,
441
+ processing_time=processing_time,
442
+ model_weights={}
443
+ )
444
+
445
+ def update_performance(self, model_name: str, accuracy: float):
446
+ """Atualiza performance histórica de um modelo"""
447
+ if model_name not in self.performance_history:
448
+ self.performance_history[model_name] = accuracy
449
+ else:
450
+ # Média móvel exponencial
451
+ alpha = 0.1
452
+ self.performance_history[model_name] = (
453
+ alpha * accuracy + (1 - alpha) * self.performance_history[model_name]
454
+ )
455
+
456
+ def get_model_stats(self) -> Dict[str, Any]:
457
+ """Retorna estatísticas dos modelos"""
458
+ stats = {
459
+ 'total_models': len(self.models),
460
+ 'loaded_models': sum(1 for m in self.models if m.is_loaded),
461
+ 'cache_size': len(self.cache.cache),
462
+ 'performance_history': self.performance_history.copy(),
463
+ 'model_weights': {m.name: m.weight for m in self.models}
464
+ }
465
+ return stats
466
+
467
+ # Instância global do ensemble
468
+ ensemble_ai = EnsembleAI()
469
+
470
+ # Função de conveniência para uso externo
471
+ async def analyze_market_sentiment(text: str, use_cache: bool = True) -> EnsembleResult:
472
+ """Função principal para análise de sentimento de mercado"""
473
+ return await ensemble_ai.analyze_sentiment(text, use_cache)
474
+
475
+ if __name__ == "__main__":
476
+ # Teste do sistema
477
+ async def test_ensemble():
478
+ test_texts = [
479
+ "O mercado está em alta, com forte tendência de compra",
480
+ "Queda acentuada nos preços, momento de cautela",
481
+ "Mercado lateral, sem direção definida"
482
+ ]
483
+
484
+ for text in test_texts:
485
+ print(f"\nAnalisando: {text}")
486
+ result = await analyze_market_sentiment(text)
487
+ print(f"Resultado: {result.final_prediction}")
488
+ print(f"Confiança: {result.confidence:.2f}")
489
+ print(f"Score: {result.sentiment_score:.2f}")
490
+ print(f"Consenso: {result.consensus_strength:.2f}")
491
+ print(f"Tempo: {result.processing_time:.3f}s")
492
+
493
+ asyncio.run(test_ensemble())
src/ai/performance_optimizer.py ADDED
@@ -0,0 +1,632 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ Sistema de Otimização de Performance para Ensemble AI
5
+ Implementa cache inteligente, processamento paralelo e otimizações avançadas
6
+ """
7
+
8
+ import asyncio
9
+ import hashlib
10
+ import json
11
+ import logging
12
+ import time
13
+ from concurrent.futures import ThreadPoolExecutor, as_completed
14
+ from dataclasses import dataclass, asdict
15
+ from datetime import datetime, timedelta
16
+ from typing import Dict, List, Optional, Any, Callable, Tuple
17
+ from collections import defaultdict, deque
18
+ import threading
19
+ import weakref
20
+ import pickle
21
+ import os
22
+ from pathlib import Path
23
+
24
+ @dataclass
25
+ class CacheEntry:
26
+ """Entrada do cache com metadados"""
27
+ key: str
28
+ value: Any
29
+ timestamp: datetime
30
+ access_count: int
31
+ last_access: datetime
32
+ ttl: Optional[timedelta]
33
+ size_bytes: int
34
+ hit_count: int = 0
35
+
36
+ @dataclass
37
+ class PerformanceMetrics:
38
+ """Métricas de performance do sistema"""
39
+ cache_hits: int = 0
40
+ cache_misses: int = 0
41
+ total_requests: int = 0
42
+ avg_response_time: float = 0.0
43
+ parallel_executions: int = 0
44
+ memory_usage_mb: float = 0.0
45
+ cpu_usage_percent: float = 0.0
46
+ active_threads: int = 0
47
+ queue_size: int = 0
48
+
49
+ class IntelligentCache:
50
+ """Cache inteligente com estratégias adaptativas"""
51
+
52
+ def __init__(self, max_size: int = 1000, default_ttl: timedelta = timedelta(hours=1)):
53
+ self.max_size = max_size
54
+ self.default_ttl = default_ttl
55
+ self.cache: Dict[str, CacheEntry] = {}
56
+ self.access_order = deque() # Para LRU
57
+ self.size_tracker = 0
58
+ self.lock = threading.RLock()
59
+
60
+ # Estatísticas
61
+ self.hits = 0
62
+ self.misses = 0
63
+ self.evictions = 0
64
+
65
+ # Cache persistente
66
+ self.persistent_cache_dir = Path("cache/ai_cache")
67
+ self.persistent_cache_dir.mkdir(parents=True, exist_ok=True)
68
+ self.enable_persistence = True
69
+
70
+ # Estratégias de eviction
71
+ self.eviction_strategies = {
72
+ 'lru': self._evict_lru,
73
+ 'lfu': self._evict_lfu,
74
+ 'ttl': self._evict_expired,
75
+ 'size': self._evict_largest,
76
+ 'adaptive': self._evict_adaptive
77
+ }
78
+ self.current_strategy = 'adaptive'
79
+
80
+ def get(self, key: str) -> Optional[Any]:
81
+ """Recupera item do cache"""
82
+ with self.lock:
83
+ cache_key = self._generate_key(key)
84
+
85
+ if cache_key in self.cache:
86
+ entry = self.cache[cache_key]
87
+
88
+ # Verificar TTL
89
+ if self._is_expired(entry):
90
+ self._remove_entry(cache_key)
91
+ self.misses += 1
92
+ return None
93
+
94
+ # Atualizar estatísticas de acesso
95
+ entry.access_count += 1
96
+ entry.hit_count += 1
97
+ entry.last_access = datetime.now()
98
+
99
+ # Atualizar ordem LRU
100
+ if cache_key in self.access_order:
101
+ self.access_order.remove(cache_key)
102
+ self.access_order.append(cache_key)
103
+
104
+ self.hits += 1
105
+ return entry.value
106
+
107
+ self.misses += 1
108
+
109
+ # Tentar cache persistente
110
+ if self.enable_persistence:
111
+ persistent_value = self._load_from_persistent_cache(cache_key)
112
+ if persistent_value is not None:
113
+ # Recarregar no cache em memória
114
+ self.put(key, persistent_value)
115
+ return persistent_value
116
+
117
+ return None
118
+
119
+ def put(self, key: str, value: Any, ttl: Optional[timedelta] = None) -> None:
120
+ """Armazena item no cache"""
121
+ with self.lock:
122
+ cache_key = self._generate_key(key)
123
+
124
+ # Calcular tamanho
125
+ try:
126
+ size_bytes = len(pickle.dumps(value))
127
+ except:
128
+ size_bytes = 1024 # Estimativa padrão
129
+
130
+ # Verificar se precisa fazer eviction
131
+ while len(self.cache) >= self.max_size or self.size_tracker + size_bytes > self.max_size * 10000:
132
+ if not self._evict_one():
133
+ break # Não conseguiu fazer eviction
134
+
135
+ # Criar entrada
136
+ entry = CacheEntry(
137
+ key=cache_key,
138
+ value=value,
139
+ timestamp=datetime.now(),
140
+ access_count=1,
141
+ last_access=datetime.now(),
142
+ ttl=ttl or self.default_ttl,
143
+ size_bytes=size_bytes
144
+ )
145
+
146
+ # Remover entrada existente se houver
147
+ if cache_key in self.cache:
148
+ old_entry = self.cache[cache_key]
149
+ self.size_tracker -= old_entry.size_bytes
150
+
151
+ # Adicionar nova entrada
152
+ self.cache[cache_key] = entry
153
+ self.size_tracker += size_bytes
154
+
155
+ # Atualizar ordem LRU
156
+ if cache_key in self.access_order:
157
+ self.access_order.remove(cache_key)
158
+ self.access_order.append(cache_key)
159
+
160
+ # Salvar no cache persistente
161
+ if self.enable_persistence:
162
+ self._save_to_persistent_cache(cache_key, value)
163
+
164
+ def _generate_key(self, key: str) -> str:
165
+ """Gera chave hash para o cache"""
166
+ return hashlib.md5(key.encode()).hexdigest()
167
+
168
+ def _is_expired(self, entry: CacheEntry) -> bool:
169
+ """Verifica se entrada expirou"""
170
+ if entry.ttl is None:
171
+ return False
172
+ return datetime.now() - entry.timestamp > entry.ttl
173
+
174
+ def _evict_one(self) -> bool:
175
+ """Remove uma entrada usando estratégia atual"""
176
+ strategy_func = self.eviction_strategies.get(self.current_strategy, self._evict_lru)
177
+ return strategy_func()
178
+
179
+ def _evict_lru(self) -> bool:
180
+ """Remove entrada menos recentemente usada"""
181
+ if not self.access_order:
182
+ return False
183
+
184
+ key_to_remove = self.access_order.popleft()
185
+ self._remove_entry(key_to_remove)
186
+ return True
187
+
188
+ def _evict_lfu(self) -> bool:
189
+ """Remove entrada menos frequentemente usada"""
190
+ if not self.cache:
191
+ return False
192
+
193
+ # Encontrar entrada com menor access_count
194
+ min_access_key = min(self.cache.keys(), key=lambda k: self.cache[k].access_count)
195
+ self._remove_entry(min_access_key)
196
+ return True
197
+
198
+ def _evict_expired(self) -> bool:
199
+ """Remove entradas expiradas"""
200
+ expired_keys = [k for k, v in self.cache.items() if self._is_expired(v)]
201
+
202
+ if not expired_keys:
203
+ return False
204
+
205
+ for key in expired_keys:
206
+ self._remove_entry(key)
207
+
208
+ return True
209
+
210
+ def _evict_largest(self) -> bool:
211
+ """Remove entrada com maior tamanho"""
212
+ if not self.cache:
213
+ return False
214
+
215
+ largest_key = max(self.cache.keys(), key=lambda k: self.cache[k].size_bytes)
216
+ self._remove_entry(largest_key)
217
+ return True
218
+
219
+ def _evict_adaptive(self) -> bool:
220
+ """Estratégia adaptativa de eviction"""
221
+ # Primeiro tentar remover expirados
222
+ if self._evict_expired():
223
+ return True
224
+
225
+ # Se cache está muito cheio, remover os maiores
226
+ if len(self.cache) > self.max_size * 0.9:
227
+ return self._evict_largest()
228
+
229
+ # Caso contrário, usar LRU
230
+ return self._evict_lru()
231
+
232
+ def _remove_entry(self, key: str) -> None:
233
+ """Remove entrada do cache"""
234
+ if key in self.cache:
235
+ entry = self.cache[key]
236
+ self.size_tracker -= entry.size_bytes
237
+ del self.cache[key]
238
+ self.evictions += 1
239
+
240
+ if key in self.access_order:
241
+ self.access_order.remove(key)
242
+
243
+ def _save_to_persistent_cache(self, key: str, value: Any) -> None:
244
+ """Salva no cache persistente"""
245
+ try:
246
+ cache_file = self.persistent_cache_dir / f"{key}.pkl"
247
+ with open(cache_file, 'wb') as f:
248
+ pickle.dump({
249
+ 'value': value,
250
+ 'timestamp': datetime.now(),
251
+ 'key': key
252
+ }, f)
253
+ except Exception as e:
254
+ logging.warning(f"Erro ao salvar cache persistente: {e}")
255
+
256
+ def _load_from_persistent_cache(self, key: str) -> Optional[Any]:
257
+ """Carrega do cache persistente"""
258
+ try:
259
+ cache_file = self.persistent_cache_dir / f"{key}.pkl"
260
+ if cache_file.exists():
261
+ with open(cache_file, 'rb') as f:
262
+ data = pickle.load(f)
263
+
264
+ # Verificar se não expirou (24 horas)
265
+ if datetime.now() - data['timestamp'] < timedelta(hours=24):
266
+ return data['value']
267
+ else:
268
+ # Remover arquivo expirado
269
+ cache_file.unlink()
270
+ except Exception as e:
271
+ logging.warning(f"Erro ao carregar cache persistente: {e}")
272
+
273
+ return None
274
+
275
+ def get_stats(self) -> Dict[str, Any]:
276
+ """Retorna estatísticas do cache"""
277
+ with self.lock:
278
+ total_requests = self.hits + self.misses
279
+ hit_rate = (self.hits / total_requests * 100) if total_requests > 0 else 0
280
+
281
+ return {
282
+ 'hits': self.hits,
283
+ 'misses': self.misses,
284
+ 'hit_rate': hit_rate,
285
+ 'evictions': self.evictions,
286
+ 'current_size': len(self.cache),
287
+ 'max_size': self.max_size,
288
+ 'memory_usage_bytes': self.size_tracker,
289
+ 'strategy': self.current_strategy
290
+ }
291
+
292
+ def clear(self) -> None:
293
+ """Limpa o cache"""
294
+ with self.lock:
295
+ self.cache.clear()
296
+ self.access_order.clear()
297
+ self.size_tracker = 0
298
+
299
+ class ParallelProcessor:
300
+ """Processador paralelo para análises de IA"""
301
+
302
+ def __init__(self, max_workers: int = 4):
303
+ self.max_workers = max_workers
304
+ self.executor = ThreadPoolExecutor(max_workers=max_workers)
305
+ self.active_tasks = set()
306
+ self.task_queue = asyncio.Queue()
307
+ self.metrics = PerformanceMetrics()
308
+ self.lock = threading.Lock()
309
+
310
+ async def process_parallel(self, tasks: List[Callable], timeout: float = 30.0) -> List[Any]:
311
+ """Processa tarefas em paralelo"""
312
+ if not tasks:
313
+ return []
314
+
315
+ start_time = time.time()
316
+
317
+ # Submeter tarefas
318
+ futures = []
319
+ for task in tasks:
320
+ future = self.executor.submit(task)
321
+ futures.append(future)
322
+
323
+ with self.lock:
324
+ self.active_tasks.add(future)
325
+ self.metrics.parallel_executions += 1
326
+
327
+ # Aguardar resultados
328
+ results = []
329
+ try:
330
+ for future in as_completed(futures, timeout=timeout):
331
+ try:
332
+ result = future.result()
333
+ results.append(result)
334
+ except Exception as e:
335
+ logging.error(f"Erro em tarefa paralela: {e}")
336
+ results.append(None)
337
+ finally:
338
+ with self.lock:
339
+ self.active_tasks.discard(future)
340
+
341
+ except TimeoutError:
342
+ logging.warning(f"Timeout em processamento paralelo após {timeout}s")
343
+ # Cancelar tarefas pendentes
344
+ for future in futures:
345
+ future.cancel()
346
+ with self.lock:
347
+ self.active_tasks.discard(future)
348
+
349
+ # Atualizar métricas
350
+ processing_time = time.time() - start_time
351
+ with self.lock:
352
+ self.metrics.avg_response_time = (
353
+ self.metrics.avg_response_time * 0.9 + processing_time * 0.1
354
+ )
355
+ self.metrics.active_threads = len(self.active_tasks)
356
+
357
+ return results
358
+
359
+ def get_metrics(self) -> PerformanceMetrics:
360
+ """Retorna métricas de performance"""
361
+ with self.lock:
362
+ return PerformanceMetrics(
363
+ cache_hits=self.metrics.cache_hits,
364
+ cache_misses=self.metrics.cache_misses,
365
+ total_requests=self.metrics.total_requests,
366
+ avg_response_time=self.metrics.avg_response_time,
367
+ parallel_executions=self.metrics.parallel_executions,
368
+ active_threads=len(self.active_tasks),
369
+ queue_size=self.task_queue.qsize() if hasattr(self.task_queue, 'qsize') else 0
370
+ )
371
+
372
+ def shutdown(self):
373
+ """Encerra o processador"""
374
+ self.executor.shutdown(wait=True)
375
+
376
+ class PerformanceOptimizer:
377
+ """Sistema principal de otimização de performance"""
378
+
379
+ def __init__(self, cache_size: int = 1000, max_workers: int = 4):
380
+ self.cache = IntelligentCache(max_size=cache_size)
381
+ self.parallel_processor = ParallelProcessor(max_workers=max_workers)
382
+ self.metrics_history = deque(maxlen=1000)
383
+ self.optimization_rules = []
384
+ self.logger = logging.getLogger(__name__)
385
+
386
+ # Configurações adaptativas
387
+ self.adaptive_config = {
388
+ 'cache_ttl_base': timedelta(hours=1),
389
+ 'parallel_threshold': 3, # Número mínimo de tarefas para paralelizar
390
+ 'timeout_base': 30.0,
391
+ 'memory_threshold_mb': 500
392
+ }
393
+
394
+ # Inicializar regras de otimização
395
+ self._initialize_optimization_rules()
396
+
397
+ def _initialize_optimization_rules(self):
398
+ """Inicializa regras de otimização adaptativa"""
399
+ self.optimization_rules = [
400
+ self._rule_adjust_cache_ttl,
401
+ self._rule_adjust_parallel_threshold,
402
+ self._rule_memory_management,
403
+ self._rule_timeout_adjustment
404
+ ]
405
+
406
+ async def optimize_analysis(self, analysis_func: Callable,
407
+ text: str,
408
+ use_cache: bool = True,
409
+ force_parallel: bool = False) -> Any:
410
+ """Otimiza execução de análise com cache e paralelização"""
411
+ start_time = time.time()
412
+
413
+ # Gerar chave de cache
414
+ cache_key = f"analysis_{hashlib.md5(text.encode()).hexdigest()}"
415
+
416
+ # Tentar cache primeiro
417
+ if use_cache:
418
+ cached_result = self.cache.get(cache_key)
419
+ if cached_result is not None:
420
+ self.logger.debug(f"Cache hit para análise: {cache_key[:8]}...")
421
+ return cached_result
422
+
423
+ # Executar análise
424
+ try:
425
+ if force_parallel or self._should_use_parallel():
426
+ # Análise paralela (se aplicável)
427
+ result = await self._execute_parallel_analysis(analysis_func, text)
428
+ else:
429
+ # Análise sequencial
430
+ result = await self._execute_sequential_analysis(analysis_func, text)
431
+
432
+ # Armazenar no cache
433
+ if use_cache and result is not None:
434
+ ttl = self._calculate_adaptive_ttl(text, result)
435
+ self.cache.put(cache_key, result, ttl)
436
+
437
+ # Registrar métricas
438
+ processing_time = time.time() - start_time
439
+ self._record_metrics(processing_time, use_cache, cached_result is not None)
440
+
441
+ return result
442
+
443
+ except Exception as e:
444
+ self.logger.error(f"Erro na análise otimizada: {e}")
445
+ raise
446
+
447
+ async def _execute_sequential_analysis(self, analysis_func: Callable, text: str) -> Any:
448
+ """Executa análise sequencial"""
449
+ if asyncio.iscoroutinefunction(analysis_func):
450
+ return await analysis_func(text)
451
+ else:
452
+ return analysis_func(text)
453
+
454
+ async def _execute_parallel_analysis(self, analysis_func: Callable, text: str) -> Any:
455
+ """Executa análise paralela (quando aplicável)"""
456
+ # Para análises que podem ser paralelizadas (ex: múltiplos modelos)
457
+ # Por enquanto, executa sequencialmente
458
+ return await self._execute_sequential_analysis(analysis_func, text)
459
+
460
+ def _should_use_parallel(self) -> bool:
461
+ """Determina se deve usar processamento paralelo"""
462
+ # Lógica para decidir paralelização
463
+ current_load = len(self.parallel_processor.active_tasks)
464
+ return current_load < self.adaptive_config['parallel_threshold']
465
+
466
+ def _calculate_adaptive_ttl(self, text: str, result: Any) -> timedelta:
467
+ """Calcula TTL adaptativo baseado no conteúdo"""
468
+ base_ttl = self.adaptive_config['cache_ttl_base']
469
+
470
+ # Ajustar baseado no tamanho do texto
471
+ text_factor = min(2.0, len(text) / 1000) # Textos maiores = TTL maior
472
+
473
+ # Ajustar baseado na confiança do resultado
474
+ confidence_factor = 1.0
475
+ if hasattr(result, 'confidence'):
476
+ confidence_factor = result.confidence # Alta confiança = TTL maior
477
+
478
+ adjusted_ttl = base_ttl * text_factor * confidence_factor
479
+ return max(timedelta(minutes=5), min(timedelta(hours=6), adjusted_ttl))
480
+
481
+ def _record_metrics(self, processing_time: float, used_cache: bool, cache_hit: bool):
482
+ """Registra métricas de performance"""
483
+ metrics = {
484
+ 'timestamp': datetime.now(),
485
+ 'processing_time': processing_time,
486
+ 'used_cache': used_cache,
487
+ 'cache_hit': cache_hit,
488
+ 'memory_usage': self._get_memory_usage()
489
+ }
490
+
491
+ self.metrics_history.append(metrics)
492
+
493
+ # Aplicar regras de otimização
494
+ self._apply_optimization_rules()
495
+
496
+ def _get_memory_usage(self) -> float:
497
+ """Estima uso de memória em MB"""
498
+ try:
499
+ import psutil
500
+ process = psutil.Process()
501
+ return process.memory_info().rss / 1024 / 1024
502
+ except ImportError:
503
+ return 0.0
504
+
505
+ def _apply_optimization_rules(self):
506
+ """Aplica regras de otimização adaptativa"""
507
+ for rule in self.optimization_rules:
508
+ try:
509
+ rule()
510
+ except Exception as e:
511
+ self.logger.warning(f"Erro ao aplicar regra de otimização: {e}")
512
+
513
+ def _rule_adjust_cache_ttl(self):
514
+ """Regra: Ajustar TTL do cache baseado na taxa de hit"""
515
+ if len(self.metrics_history) < 10:
516
+ return
517
+
518
+ recent_metrics = list(self.metrics_history)[-10:]
519
+ hit_rate = sum(1 for m in recent_metrics if m['cache_hit']) / len(recent_metrics)
520
+
521
+ if hit_rate > 0.8: # Alta taxa de hit - aumentar TTL
522
+ self.adaptive_config['cache_ttl_base'] *= 1.1
523
+ elif hit_rate < 0.3: # Baixa taxa de hit - diminuir TTL
524
+ self.adaptive_config['cache_ttl_base'] *= 0.9
525
+
526
+ # Limitar TTL
527
+ self.adaptive_config['cache_ttl_base'] = max(
528
+ timedelta(minutes=10),
529
+ min(timedelta(hours=4), self.adaptive_config['cache_ttl_base'])
530
+ )
531
+
532
+ def _rule_adjust_parallel_threshold(self):
533
+ """Regra: Ajustar threshold de paralelização"""
534
+ if len(self.metrics_history) < 20:
535
+ return
536
+
537
+ recent_metrics = list(self.metrics_history)[-20:]
538
+ avg_processing_time = sum(m['processing_time'] for m in recent_metrics) / len(recent_metrics)
539
+
540
+ if avg_processing_time > 5.0: # Processamento lento - mais paralelização
541
+ self.adaptive_config['parallel_threshold'] = max(1, self.adaptive_config['parallel_threshold'] - 1)
542
+ elif avg_processing_time < 1.0: # Processamento rápido - menos paralelização
543
+ self.adaptive_config['parallel_threshold'] = min(8, self.adaptive_config['parallel_threshold'] + 1)
544
+
545
+ def _rule_memory_management(self):
546
+ """Regra: Gerenciar memória"""
547
+ current_memory = self._get_memory_usage()
548
+
549
+ if current_memory > self.adaptive_config['memory_threshold_mb']:
550
+ # Limpar cache parcialmente
551
+ self.cache.clear()
552
+ self.logger.info(f"Cache limpo devido ao uso de memória: {current_memory:.1f}MB")
553
+
554
+ def _rule_timeout_adjustment(self):
555
+ """Regra: Ajustar timeouts"""
556
+ if len(self.metrics_history) < 15:
557
+ return
558
+
559
+ recent_metrics = list(self.metrics_history)[-15:]
560
+ avg_time = sum(m['processing_time'] for m in recent_metrics) / len(recent_metrics)
561
+
562
+ # Ajustar timeout baseado no tempo médio
563
+ self.adaptive_config['timeout_base'] = max(10.0, min(60.0, avg_time * 3))
564
+
565
+ def get_performance_report(self) -> Dict[str, Any]:
566
+ """Gera relatório completo de performance"""
567
+ cache_stats = self.cache.get_stats()
568
+ processor_metrics = self.parallel_processor.get_metrics()
569
+
570
+ # Estatísticas históricas
571
+ if self.metrics_history:
572
+ recent_metrics = list(self.metrics_history)[-50:]
573
+ avg_processing_time = sum(m['processing_time'] for m in recent_metrics) / len(recent_metrics)
574
+ cache_hit_rate = sum(1 for m in recent_metrics if m['cache_hit']) / len(recent_metrics) * 100
575
+ else:
576
+ avg_processing_time = 0.0
577
+ cache_hit_rate = 0.0
578
+
579
+ return {
580
+ 'cache': cache_stats,
581
+ 'parallel_processing': asdict(processor_metrics),
582
+ 'adaptive_config': {
583
+ k: str(v) if isinstance(v, timedelta) else v
584
+ for k, v in self.adaptive_config.items()
585
+ },
586
+ 'performance_summary': {
587
+ 'avg_processing_time': avg_processing_time,
588
+ 'cache_hit_rate': cache_hit_rate,
589
+ 'total_analyses': len(self.metrics_history),
590
+ 'memory_usage_mb': self._get_memory_usage()
591
+ }
592
+ }
593
+
594
+ def cleanup(self):
595
+ """Limpeza de recursos"""
596
+ self.parallel_processor.shutdown()
597
+ self.cache.clear()
598
+
599
+ # Instância global do otimizador
600
+ performance_optimizer = PerformanceOptimizer()
601
+
602
+ # Função de conveniência
603
+ async def optimize_ai_analysis(analysis_func: Callable, text: str, use_cache: bool = True) -> Any:
604
+ """Função principal para análise otimizada"""
605
+ return await performance_optimizer.optimize_analysis(analysis_func, text, use_cache)
606
+
607
+ if __name__ == "__main__":
608
+ # Teste do sistema de otimização
609
+ async def test_analysis(text: str):
610
+ await asyncio.sleep(0.1) # Simular processamento
611
+ return {'result': f'Análise de: {text[:20]}...', 'confidence': 0.8}
612
+
613
+ async def test_optimizer():
614
+ print("Testando sistema de otimização...")
615
+
616
+ # Teste de cache
617
+ result1 = await optimize_ai_analysis(test_analysis, "Texto de teste para análise")
618
+ result2 = await optimize_ai_analysis(test_analysis, "Texto de teste para análise") # Deve usar cache
619
+
620
+ print(f"Resultado 1: {result1}")
621
+ print(f"Resultado 2: {result2}")
622
+
623
+ # Relatório de performance
624
+ report = performance_optimizer.get_performance_report()
625
+ print(f"\nRelatório de Performance:")
626
+ print(f"Cache Hit Rate: {report['performance_summary']['cache_hit_rate']:.1f}%")
627
+ print(f"Tempo Médio: {report['performance_summary']['avg_processing_time']:.3f}s")
628
+
629
+ performance_optimizer.cleanup()
630
+
631
+ # Executar teste
632
+ asyncio.run(test_optimizer())
src/ai/voting_system.py ADDED
@@ -0,0 +1,576 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ Sistema de Votação Inteligente para Ensemble AI
5
+ Otimiza decisões através de algoritmos avançados de consenso
6
+ """
7
+
8
+ import numpy as np
9
+ import logging
10
+ from typing import Dict, List, Optional, Tuple, Any
11
+ from dataclasses import dataclass
12
+ from enum import Enum
13
+ from datetime import datetime, timedelta
14
+ import json
15
+ from collections import defaultdict, deque
16
+ import statistics
17
+
18
+ class VotingStrategy(Enum):
19
+ """Estratégias de votação disponíveis"""
20
+ SIMPLE_MAJORITY = "simple_majority"
21
+ WEIGHTED_AVERAGE = "weighted_average"
22
+ CONFIDENCE_WEIGHTED = "confidence_weighted"
23
+ DYNAMIC_CONSENSUS = "dynamic_consensus"
24
+ BAYESIAN_FUSION = "bayesian_fusion"
25
+ ADAPTIVE_ENSEMBLE = "adaptive_ensemble"
26
+
27
+ @dataclass
28
+ class VoteResult:
29
+ """Resultado de uma votação"""
30
+ decision: str
31
+ confidence: float
32
+ consensus_strength: float
33
+ strategy_used: VotingStrategy
34
+ individual_votes: List[Dict[str, Any]]
35
+ metadata: Dict[str, Any]
36
+ processing_time: float
37
+
38
+ @dataclass
39
+ class ModelPerformance:
40
+ """Métricas de performance de um modelo"""
41
+ accuracy_history: deque
42
+ recent_accuracy: float
43
+ long_term_accuracy: float
44
+ consistency_score: float
45
+ response_time_avg: float
46
+ last_updated: datetime
47
+
48
+ class AdaptiveWeightCalculator:
49
+ """Calculadora de pesos adaptativos para modelos"""
50
+
51
+ def __init__(self, window_size: int = 100):
52
+ self.window_size = window_size
53
+ self.performance_tracker = defaultdict(lambda: ModelPerformance(
54
+ accuracy_history=deque(maxlen=window_size),
55
+ recent_accuracy=0.5,
56
+ long_term_accuracy=0.5,
57
+ consistency_score=0.5,
58
+ response_time_avg=1.0,
59
+ last_updated=datetime.now()
60
+ ))
61
+ self.market_conditions = {
62
+ 'volatility': 0.5,
63
+ 'trend_strength': 0.5,
64
+ 'volume_profile': 0.5
65
+ }
66
+
67
+ def update_performance(self, model_name: str, accuracy: float, response_time: float):
68
+ """Atualiza métricas de performance de um modelo"""
69
+ perf = self.performance_tracker[model_name]
70
+
71
+ # Adicionar nova accuracy
72
+ perf.accuracy_history.append(accuracy)
73
+
74
+ # Calcular métricas
75
+ if len(perf.accuracy_history) >= 10:
76
+ perf.recent_accuracy = np.mean(list(perf.accuracy_history)[-10:])
77
+ else:
78
+ perf.recent_accuracy = np.mean(list(perf.accuracy_history))
79
+
80
+ perf.long_term_accuracy = np.mean(list(perf.accuracy_history))
81
+
82
+ # Calcular consistência (inverso do desvio padrão)
83
+ if len(perf.accuracy_history) >= 5:
84
+ std_dev = np.std(list(perf.accuracy_history))
85
+ perf.consistency_score = max(0.1, 1.0 - std_dev)
86
+
87
+ # Atualizar tempo de resposta médio
88
+ alpha = 0.1
89
+ perf.response_time_avg = alpha * response_time + (1 - alpha) * perf.response_time_avg
90
+
91
+ perf.last_updated = datetime.now()
92
+
93
+ def calculate_adaptive_weights(self, model_names: List[str],
94
+ market_context: Optional[Dict[str, float]] = None) -> Dict[str, float]:
95
+ """Calcula pesos adaptativos baseados em performance e contexto"""
96
+ weights = {}
97
+
98
+ # Atualizar condições de mercado se fornecidas
99
+ if market_context:
100
+ self.market_conditions.update(market_context)
101
+
102
+ for model_name in model_names:
103
+ perf = self.performance_tracker[model_name]
104
+
105
+ # Peso base da accuracy recente
106
+ accuracy_weight = perf.recent_accuracy
107
+
108
+ # Ajuste por consistência
109
+ consistency_factor = perf.consistency_score
110
+
111
+ # Ajuste por tempo de resposta (modelos mais rápidos têm vantagem)
112
+ speed_factor = min(2.0, 2.0 / max(0.1, perf.response_time_avg))
113
+
114
+ # Ajuste por condições de mercado
115
+ market_factor = self._calculate_market_adjustment(model_name)
116
+
117
+ # Peso final
118
+ final_weight = accuracy_weight * consistency_factor * speed_factor * market_factor
119
+ weights[model_name] = max(0.1, min(2.0, final_weight)) # Limitar entre 0.1 e 2.0
120
+
121
+ # Normalizar pesos
122
+ total_weight = sum(weights.values())
123
+ if total_weight > 0:
124
+ weights = {k: v / total_weight for k, v in weights.items()}
125
+
126
+ return weights
127
+
128
+ def _calculate_market_adjustment(self, model_name: str) -> float:
129
+ """Calcula ajuste baseado nas condições de mercado"""
130
+ # Diferentes modelos podem ter performance melhor em diferentes condições
131
+ model_preferences = {
132
+ 'FinBERT': {
133
+ 'high_volatility': 1.2,
134
+ 'strong_trend': 1.1,
135
+ 'high_volume': 1.0
136
+ },
137
+ 'DistilBERT-Financial': {
138
+ 'high_volatility': 1.0,
139
+ 'strong_trend': 1.2,
140
+ 'high_volume': 1.1
141
+ },
142
+ 'RoBERTa-Sentiment': {
143
+ 'high_volatility': 0.9,
144
+ 'strong_trend': 1.0,
145
+ 'high_volume': 1.2
146
+ },
147
+ 'BERT-Base': {
148
+ 'high_volatility': 1.0,
149
+ 'strong_trend': 1.0,
150
+ 'high_volume': 1.0
151
+ }
152
+ }
153
+
154
+ preferences = model_preferences.get(model_name, {
155
+ 'high_volatility': 1.0,
156
+ 'strong_trend': 1.0,
157
+ 'high_volume': 1.0
158
+ })
159
+
160
+ # Calcular fator de ajuste
161
+ volatility_factor = preferences['high_volatility'] if self.market_conditions['volatility'] > 0.7 else 1.0
162
+ trend_factor = preferences['strong_trend'] if self.market_conditions['trend_strength'] > 0.7 else 1.0
163
+ volume_factor = preferences['high_volume'] if self.market_conditions['volume_profile'] > 0.7 else 1.0
164
+
165
+ return (volatility_factor + trend_factor + volume_factor) / 3.0
166
+
167
+ class IntelligentVotingSystem:
168
+ """Sistema de votação inteligente com múltiplas estratégias"""
169
+
170
+ def __init__(self):
171
+ self.weight_calculator = AdaptiveWeightCalculator()
172
+ self.voting_history = deque(maxlen=1000)
173
+ self.strategy_performance = defaultdict(lambda: deque(maxlen=100))
174
+ self.logger = logging.getLogger(__name__)
175
+
176
+ # Configurações de estratégias
177
+ self.strategy_configs = {
178
+ VotingStrategy.SIMPLE_MAJORITY: {'threshold': 0.5},
179
+ VotingStrategy.WEIGHTED_AVERAGE: {'min_confidence': 0.3},
180
+ VotingStrategy.CONFIDENCE_WEIGHTED: {'confidence_power': 2.0},
181
+ VotingStrategy.DYNAMIC_CONSENSUS: {'consensus_threshold': 0.7},
182
+ VotingStrategy.BAYESIAN_FUSION: {'prior_strength': 0.1},
183
+ VotingStrategy.ADAPTIVE_ENSEMBLE: {'adaptation_rate': 0.1}
184
+ }
185
+
186
+ def vote(self, predictions: List[Dict[str, Any]],
187
+ strategy: VotingStrategy = VotingStrategy.ADAPTIVE_ENSEMBLE,
188
+ market_context: Optional[Dict[str, float]] = None) -> VoteResult:
189
+ """Executa votação usando estratégia especificada"""
190
+ start_time = datetime.now()
191
+
192
+ if not predictions:
193
+ return self._empty_vote_result(strategy, start_time)
194
+
195
+ # Selecionar estratégia automaticamente se for ADAPTIVE_ENSEMBLE
196
+ if strategy == VotingStrategy.ADAPTIVE_ENSEMBLE:
197
+ strategy = self._select_best_strategy(predictions, market_context)
198
+
199
+ # Executar votação
200
+ result = self._execute_voting_strategy(predictions, strategy, market_context)
201
+
202
+ # Calcular tempo de processamento
203
+ processing_time = (datetime.now() - start_time).total_seconds()
204
+ result.processing_time = processing_time
205
+
206
+ # Armazenar no histórico
207
+ self.voting_history.append({
208
+ 'timestamp': datetime.now(),
209
+ 'strategy': strategy,
210
+ 'result': result,
211
+ 'num_predictions': len(predictions)
212
+ })
213
+
214
+ return result
215
+
216
+ def _select_best_strategy(self, predictions: List[Dict[str, Any]],
217
+ market_context: Optional[Dict[str, float]]) -> VotingStrategy:
218
+ """Seleciona a melhor estratégia baseada no contexto"""
219
+ # Analisar características das predições
220
+ confidences = [p.get('confidence', 0.5) for p in predictions]
221
+ avg_confidence = np.mean(confidences)
222
+ confidence_variance = np.var(confidences)
223
+
224
+ # Analisar consenso
225
+ predictions_count = defaultdict(int)
226
+ for p in predictions:
227
+ predictions_count[p.get('prediction', 'NEUTRO')] += 1
228
+
229
+ max_agreement = max(predictions_count.values()) / len(predictions)
230
+
231
+ # Selecionar estratégia baseada nas características
232
+ if max_agreement > 0.8: # Alto consenso
233
+ return VotingStrategy.SIMPLE_MAJORITY
234
+ elif avg_confidence > 0.8: # Alta confiança
235
+ return VotingStrategy.CONFIDENCE_WEIGHTED
236
+ elif confidence_variance > 0.1: # Alta variância na confiança
237
+ return VotingStrategy.WEIGHTED_AVERAGE
238
+ elif len(predictions) >= 4: # Muitos modelos
239
+ return VotingStrategy.BAYESIAN_FUSION
240
+ else:
241
+ return VotingStrategy.DYNAMIC_CONSENSUS
242
+
243
+ def _execute_voting_strategy(self, predictions: List[Dict[str, Any]],
244
+ strategy: VotingStrategy,
245
+ market_context: Optional[Dict[str, float]]) -> VoteResult:
246
+ """Executa a estratégia de votação especificada"""
247
+
248
+ if strategy == VotingStrategy.SIMPLE_MAJORITY:
249
+ return self._simple_majority_vote(predictions)
250
+ elif strategy == VotingStrategy.WEIGHTED_AVERAGE:
251
+ return self._weighted_average_vote(predictions, market_context)
252
+ elif strategy == VotingStrategy.CONFIDENCE_WEIGHTED:
253
+ return self._confidence_weighted_vote(predictions)
254
+ elif strategy == VotingStrategy.DYNAMIC_CONSENSUS:
255
+ return self._dynamic_consensus_vote(predictions)
256
+ elif strategy == VotingStrategy.BAYESIAN_FUSION:
257
+ return self._bayesian_fusion_vote(predictions)
258
+ else:
259
+ # Fallback para weighted average
260
+ return self._weighted_average_vote(predictions, market_context)
261
+
262
+ def _simple_majority_vote(self, predictions: List[Dict[str, Any]]) -> VoteResult:
263
+ """Votação por maioria simples"""
264
+ vote_counts = defaultdict(int)
265
+
266
+ for pred in predictions:
267
+ vote_counts[pred.get('prediction', 'NEUTRO')] += 1
268
+
269
+ # Encontrar vencedor
270
+ winner = max(vote_counts.keys(), key=lambda k: vote_counts[k])
271
+ max_votes = vote_counts[winner]
272
+
273
+ # Calcular confiança e consenso
274
+ confidence = max_votes / len(predictions)
275
+ consensus_strength = confidence
276
+
277
+ return VoteResult(
278
+ decision=winner,
279
+ confidence=confidence,
280
+ consensus_strength=consensus_strength,
281
+ strategy_used=VotingStrategy.SIMPLE_MAJORITY,
282
+ individual_votes=[{'prediction': p.get('prediction'), 'confidence': p.get('confidence')} for p in predictions],
283
+ metadata={'vote_counts': dict(vote_counts)},
284
+ processing_time=0.0
285
+ )
286
+
287
+ def _weighted_average_vote(self, predictions: List[Dict[str, Any]],
288
+ market_context: Optional[Dict[str, float]]) -> VoteResult:
289
+ """Votação por média ponderada"""
290
+ model_names = [p.get('model_name', f'model_{i}') for i, p in enumerate(predictions)]
291
+ weights = self.weight_calculator.calculate_adaptive_weights(model_names, market_context)
292
+
293
+ # Calcular scores ponderados
294
+ sentiment_scores = []
295
+ total_weight = 0
296
+
297
+ for i, pred in enumerate(predictions):
298
+ model_name = model_names[i]
299
+ weight = weights.get(model_name, 1.0)
300
+ confidence = pred.get('confidence', 0.5)
301
+ sentiment_score = pred.get('sentiment_score', 0.0)
302
+
303
+ weighted_score = sentiment_score * weight * confidence
304
+ sentiment_scores.append(weighted_score)
305
+ total_weight += weight * confidence
306
+
307
+ # Calcular resultado final
308
+ if total_weight > 0:
309
+ final_sentiment = sum(sentiment_scores) / total_weight
310
+ else:
311
+ final_sentiment = 0.0
312
+
313
+ # Determinar decisão
314
+ if final_sentiment > 0.1:
315
+ decision = "POSITIVO"
316
+ elif final_sentiment < -0.1:
317
+ decision = "NEGATIVO"
318
+ else:
319
+ decision = "NEUTRO"
320
+
321
+ # Calcular confiança média ponderada
322
+ weighted_confidences = [p.get('confidence', 0.5) * weights.get(model_names[i], 1.0)
323
+ for i, p in enumerate(predictions)]
324
+ confidence = sum(weighted_confidences) / sum(weights.values()) if weights else 0.5
325
+
326
+ # Calcular consenso
327
+ consensus_strength = self._calculate_consensus_strength(predictions)
328
+
329
+ return VoteResult(
330
+ decision=decision,
331
+ confidence=confidence,
332
+ consensus_strength=consensus_strength,
333
+ strategy_used=VotingStrategy.WEIGHTED_AVERAGE,
334
+ individual_votes=[{'prediction': p.get('prediction'), 'confidence': p.get('confidence'),
335
+ 'weight': weights.get(model_names[i], 1.0)} for i, p in enumerate(predictions)],
336
+ metadata={'final_sentiment': final_sentiment, 'weights': weights},
337
+ processing_time=0.0
338
+ )
339
+
340
+ def _confidence_weighted_vote(self, predictions: List[Dict[str, Any]]) -> VoteResult:
341
+ """Votação ponderada pela confiança"""
342
+ power = self.strategy_configs[VotingStrategy.CONFIDENCE_WEIGHTED]['confidence_power']
343
+
344
+ # Calcular pesos baseados na confiança
345
+ weighted_votes = defaultdict(float)
346
+ total_weight = 0
347
+
348
+ for pred in predictions:
349
+ confidence = pred.get('confidence', 0.5)
350
+ prediction = pred.get('prediction', 'NEUTRO')
351
+ weight = confidence ** power
352
+
353
+ weighted_votes[prediction] += weight
354
+ total_weight += weight
355
+
356
+ # Normalizar
357
+ if total_weight > 0:
358
+ weighted_votes = {k: v / total_weight for k, v in weighted_votes.items()}
359
+
360
+ # Encontrar vencedor
361
+ winner = max(weighted_votes.keys(), key=lambda k: weighted_votes[k])
362
+ confidence = weighted_votes[winner]
363
+
364
+ # Calcular consenso
365
+ consensus_strength = confidence
366
+
367
+ return VoteResult(
368
+ decision=winner,
369
+ confidence=confidence,
370
+ consensus_strength=consensus_strength,
371
+ strategy_used=VotingStrategy.CONFIDENCE_WEIGHTED,
372
+ individual_votes=[{'prediction': p.get('prediction'), 'confidence': p.get('confidence')} for p in predictions],
373
+ metadata={'weighted_votes': dict(weighted_votes)},
374
+ processing_time=0.0
375
+ )
376
+
377
+ def _dynamic_consensus_vote(self, predictions: List[Dict[str, Any]]) -> VoteResult:
378
+ """Votação por consenso dinâmico"""
379
+ threshold = self.strategy_configs[VotingStrategy.DYNAMIC_CONSENSUS]['consensus_threshold']
380
+
381
+ # Agrupar por predição
382
+ groups = defaultdict(list)
383
+ for pred in predictions:
384
+ groups[pred.get('prediction', 'NEUTRO')].append(pred)
385
+
386
+ # Encontrar grupo com maior consenso
387
+ best_group = None
388
+ best_consensus = 0
389
+
390
+ for prediction, group in groups.items():
391
+ # Calcular consenso do grupo
392
+ confidences = [p.get('confidence', 0.5) for p in group]
393
+ group_size_factor = len(group) / len(predictions)
394
+ avg_confidence = np.mean(confidences)
395
+ consensus = group_size_factor * avg_confidence
396
+
397
+ if consensus > best_consensus:
398
+ best_consensus = consensus
399
+ best_group = (prediction, group)
400
+
401
+ if best_group and best_consensus >= threshold:
402
+ decision = best_group[0]
403
+ confidence = best_consensus
404
+ else:
405
+ # Fallback para neutro se não há consenso suficiente
406
+ decision = "NEUTRO"
407
+ confidence = 0.5
408
+
409
+ return VoteResult(
410
+ decision=decision,
411
+ confidence=confidence,
412
+ consensus_strength=best_consensus,
413
+ strategy_used=VotingStrategy.DYNAMIC_CONSENSUS,
414
+ individual_votes=[{'prediction': p.get('prediction'), 'confidence': p.get('confidence')} for p in predictions],
415
+ metadata={'threshold': threshold, 'groups': {k: len(v) for k, v in groups.items()}},
416
+ processing_time=0.0
417
+ )
418
+
419
+ def _bayesian_fusion_vote(self, predictions: List[Dict[str, Any]]) -> VoteResult:
420
+ """Votação usando fusão Bayesiana"""
421
+ prior_strength = self.strategy_configs[VotingStrategy.BAYESIAN_FUSION]['prior_strength']
422
+
423
+ # Prior uniforme
424
+ classes = ['POSITIVO', 'NEUTRO', 'NEGATIVO']
425
+ prior = {cls: 1.0/len(classes) for cls in classes}
426
+
427
+ # Calcular likelihood para cada classe
428
+ posteriors = prior.copy()
429
+
430
+ for pred in predictions:
431
+ prediction = pred.get('prediction', 'NEUTRO')
432
+ confidence = pred.get('confidence', 0.5)
433
+
434
+ # Atualizar posterior
435
+ for cls in classes:
436
+ if cls == prediction:
437
+ likelihood = confidence
438
+ else:
439
+ likelihood = (1 - confidence) / (len(classes) - 1)
440
+
441
+ posteriors[cls] *= (prior_strength * prior[cls] + likelihood)
442
+
443
+ # Normalizar
444
+ total = sum(posteriors.values())
445
+ if total > 0:
446
+ posteriors = {k: v / total for k, v in posteriors.items()}
447
+
448
+ # Encontrar classe com maior probabilidade
449
+ winner = max(posteriors.keys(), key=lambda k: posteriors[k])
450
+ confidence = posteriors[winner]
451
+
452
+ # Calcular consenso baseado na distribuição
453
+ entropy = -sum(p * np.log(p + 1e-10) for p in posteriors.values())
454
+ max_entropy = np.log(len(classes))
455
+ consensus_strength = 1 - (entropy / max_entropy)
456
+
457
+ return VoteResult(
458
+ decision=winner,
459
+ confidence=confidence,
460
+ consensus_strength=consensus_strength,
461
+ strategy_used=VotingStrategy.BAYESIAN_FUSION,
462
+ individual_votes=[{'prediction': p.get('prediction'), 'confidence': p.get('confidence')} for p in predictions],
463
+ metadata={'posteriors': posteriors, 'entropy': entropy},
464
+ processing_time=0.0
465
+ )
466
+
467
+ def _calculate_consensus_strength(self, predictions: List[Dict[str, Any]]) -> float:
468
+ """Calcula força do consenso entre predições"""
469
+ if not predictions:
470
+ return 0.0
471
+
472
+ # Contar predições por classe
473
+ counts = defaultdict(int)
474
+ for pred in predictions:
475
+ counts[pred.get('prediction', 'NEUTRO')] += 1
476
+
477
+ # Calcular consenso
478
+ max_count = max(counts.values())
479
+ consensus = max_count / len(predictions)
480
+
481
+ return consensus
482
+
483
+ def _empty_vote_result(self, strategy: VotingStrategy, start_time: datetime) -> VoteResult:
484
+ """Resultado para quando não há predições"""
485
+ return VoteResult(
486
+ decision="NEUTRO",
487
+ confidence=0.0,
488
+ consensus_strength=0.0,
489
+ strategy_used=strategy,
490
+ individual_votes=[],
491
+ metadata={'error': 'no_predictions'},
492
+ processing_time=(datetime.now() - start_time).total_seconds()
493
+ )
494
+
495
+ def update_strategy_performance(self, strategy: VotingStrategy, accuracy: float):
496
+ """Atualiza performance de uma estratégia"""
497
+ self.strategy_performance[strategy].append(accuracy)
498
+
499
+ def get_best_strategy(self) -> VotingStrategy:
500
+ """Retorna a estratégia com melhor performance recente"""
501
+ if not self.strategy_performance:
502
+ return VotingStrategy.ADAPTIVE_ENSEMBLE
503
+
504
+ best_strategy = VotingStrategy.ADAPTIVE_ENSEMBLE
505
+ best_performance = 0.0
506
+
507
+ for strategy, performances in self.strategy_performance.items():
508
+ if len(performances) >= 5: # Mínimo de amostras
509
+ avg_performance = np.mean(list(performances)[-10:]) # Últimas 10
510
+ if avg_performance > best_performance:
511
+ best_performance = avg_performance
512
+ best_strategy = strategy
513
+
514
+ return best_strategy
515
+
516
+ def get_voting_stats(self) -> Dict[str, Any]:
517
+ """Retorna estatísticas do sistema de votação"""
518
+ stats = {
519
+ 'total_votes': len(self.voting_history),
520
+ 'strategy_usage': defaultdict(int),
521
+ 'avg_processing_time': 0.0,
522
+ 'avg_consensus_strength': 0.0,
523
+ 'strategy_performance': {}
524
+ }
525
+
526
+ if self.voting_history:
527
+ # Contar uso de estratégias
528
+ for vote in self.voting_history:
529
+ stats['strategy_usage'][vote['strategy'].value] += 1
530
+
531
+ # Calcular médias
532
+ processing_times = [vote['result'].processing_time for vote in self.voting_history]
533
+ consensus_strengths = [vote['result'].consensus_strength for vote in self.voting_history]
534
+
535
+ stats['avg_processing_time'] = np.mean(processing_times)
536
+ stats['avg_consensus_strength'] = np.mean(consensus_strengths)
537
+
538
+ # Performance das estratégias
539
+ for strategy, performances in self.strategy_performance.items():
540
+ if performances:
541
+ stats['strategy_performance'][strategy.value] = {
542
+ 'avg_accuracy': np.mean(list(performances)),
543
+ 'recent_accuracy': np.mean(list(performances)[-10:]) if len(performances) >= 10 else np.mean(list(performances)),
544
+ 'sample_count': len(performances)
545
+ }
546
+
547
+ return dict(stats)
548
+
549
+ # Instância global do sistema de votação
550
+ voting_system = IntelligentVotingSystem()
551
+
552
+ # Função de conveniência
553
+ def intelligent_vote(predictions: List[Dict[str, Any]],
554
+ strategy: VotingStrategy = VotingStrategy.ADAPTIVE_ENSEMBLE,
555
+ market_context: Optional[Dict[str, float]] = None) -> VoteResult:
556
+ """Função principal para votação inteligente"""
557
+ return voting_system.vote(predictions, strategy, market_context)
558
+
559
+ if __name__ == "__main__":
560
+ # Teste do sistema
561
+ test_predictions = [
562
+ {'model_name': 'FinBERT', 'prediction': 'POSITIVO', 'confidence': 0.8, 'sentiment_score': 0.6},
563
+ {'model_name': 'DistilBERT', 'prediction': 'POSITIVO', 'confidence': 0.7, 'sentiment_score': 0.4},
564
+ {'model_name': 'RoBERTa', 'prediction': 'NEUTRO', 'confidence': 0.6, 'sentiment_score': 0.1},
565
+ {'model_name': 'BERT', 'prediction': 'POSITIVO', 'confidence': 0.9, 'sentiment_score': 0.7}
566
+ ]
567
+
568
+ print("Testando sistema de votação inteligente...")
569
+
570
+ for strategy in VotingStrategy:
571
+ result = intelligent_vote(test_predictions, strategy)
572
+ print(f"\nEstratégia: {strategy.value}")
573
+ print(f"Decisão: {result.decision}")
574
+ print(f"Confiança: {result.confidence:.3f}")
575
+ print(f"Consenso: {result.consensus_strength:.3f}")
576
+ print(f"Tempo: {result.processing_time:.3f}s")
src/analysis/__pycache__/sentiment_analysis.cpython-313.pyc ADDED
Binary file (19.4 kB). View file
 
src/analysis/sentiment_analysis.py CHANGED
@@ -1,12 +1,22 @@
1
- """Módulo de análise de sentimento usando IA financeira."""
2
 
3
  import re
 
4
  from typing import Dict, Optional, Any
5
  from dataclasses import dataclass
6
 
7
  from config.config import FINANCIAL_MODELS, AIConfig, AppConfig
8
 
9
- # Importações opcionais para IA
 
 
 
 
 
 
 
 
 
10
  try:
11
  from transformers import pipeline
12
  import torch
@@ -284,32 +294,66 @@ class SentimentScorer:
284
 
285
 
286
  class SentimentAnalysisEngine:
287
- """Engine principal de análise de sentimento."""
288
 
289
  def __init__(self):
 
 
 
 
290
  self.model_manager = ModelManager()
291
  self.analyzer = SentimentAnalyzer(self.model_manager)
292
  self.scorer = SentimentScorer()
 
 
 
 
293
 
294
  def analyze_text(self, text: str) -> Dict[str, Any]:
295
- """Executa análise completa de sentimento."""
296
- # Análise de sentimento
297
- sentiment_result = self.analyzer.analyze(text)
298
 
299
- # Calcular pontuação
300
- score = self.scorer.calculate_sentiment_score(sentiment_result)
 
 
 
 
 
301
 
302
- # Gerar descrição
 
 
303
  description = self.scorer.get_sentiment_signal_description(sentiment_result)
304
 
305
  return {
306
  'result': sentiment_result,
307
  'score': score,
308
- 'description': description
 
309
  }
310
 
311
  def get_model_status(self) -> Dict[str, Any]:
312
- """Retorna status do modelo atual."""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
313
  if self.model_manager.is_model_available():
314
  model_info = self.model_manager.get_model_info()
315
  return {
@@ -328,4 +372,78 @@ class SentimentAnalysisEngine:
328
 
329
  def is_available(self) -> bool:
330
  """Verifica se análise de IA está disponível."""
331
- return self.model_manager.is_model_available()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Módulo de análise de sentimento usando IA financeira com sistema Ensemble."""
2
 
3
  import re
4
+ import asyncio
5
  from typing import Dict, Optional, Any
6
  from dataclasses import dataclass
7
 
8
  from config.config import FINANCIAL_MODELS, AIConfig, AppConfig
9
 
10
+ # Importações do sistema Ensemble
11
+ try:
12
+ from src.ai.ensemble_ai import ensemble_ai, EnsembleResult
13
+ from src.ai.voting_system import intelligent_vote, VotingStrategy
14
+ ENSEMBLE_AVAILABLE = True
15
+ except ImportError:
16
+ ENSEMBLE_AVAILABLE = False
17
+ print("Sistema Ensemble não disponível, usando fallback...")
18
+
19
+ # Importações opcionais para IA (fallback)
20
  try:
21
  from transformers import pipeline
22
  import torch
 
294
 
295
 
296
  class SentimentAnalysisEngine:
297
+ """Engine principal de análise de sentimento com sistema Ensemble."""
298
 
299
  def __init__(self):
300
+ # Sistema Ensemble (preferido)
301
+ self.ensemble_available = ENSEMBLE_AVAILABLE
302
+
303
+ # Sistema tradicional (fallback)
304
  self.model_manager = ModelManager()
305
  self.analyzer = SentimentAnalyzer(self.model_manager)
306
  self.scorer = SentimentScorer()
307
+
308
+ # Configurações do ensemble
309
+ self.voting_strategy = VotingStrategy.ADAPTIVE_ENSEMBLE
310
+ self.use_ensemble = self.ensemble_available
311
 
312
  def analyze_text(self, text: str) -> Dict[str, Any]:
313
+ """Executa análise completa de sentimento usando sistema Ensemble ou fallback."""
314
+ if not text:
315
+ return self._get_empty_result()
316
 
317
+ # Usar sistema Ensemble se disponível
318
+ if self.use_ensemble and self.ensemble_available:
319
+ try:
320
+ return self._analyze_with_ensemble(text)
321
+ except Exception as e:
322
+ print(f"Erro no sistema Ensemble, usando fallback: {e}")
323
+ # Continuar com sistema tradicional
324
 
325
+ # Sistema tradicional (fallback)
326
+ sentiment_result = self.analyzer.analyze(text)
327
+ score = self.scorer.calculate_sentiment_score(sentiment_result)
328
  description = self.scorer.get_sentiment_signal_description(sentiment_result)
329
 
330
  return {
331
  'result': sentiment_result,
332
  'score': score,
333
+ 'description': description,
334
+ 'ensemble_used': False
335
  }
336
 
337
  def get_model_status(self) -> Dict[str, Any]:
338
+ """Retorna status dos modelos de IA (Ensemble + Fallback)."""
339
+ if self.use_ensemble and self.ensemble_available:
340
+ # Status do sistema Ensemble
341
+ try:
342
+ ensemble_stats = ensemble_ai.get_performance_stats()
343
+ active_models = len([m for m in ensemble_ai.models if m.is_available])
344
+
345
+ return {
346
+ 'available': True,
347
+ 'model_name': f'Ensemble AI ({active_models} modelos)',
348
+ 'description': f'Sistema Ensemble com {active_models} modelos ativos',
349
+ 'status': 'active',
350
+ 'ensemble_stats': ensemble_stats,
351
+ 'voting_strategy': self.voting_strategy.value
352
+ }
353
+ except Exception as e:
354
+ print(f"Erro ao obter status do Ensemble: {e}")
355
+
356
+ # Status do sistema tradicional
357
  if self.model_manager.is_model_available():
358
  model_info = self.model_manager.get_model_info()
359
  return {
 
372
 
373
  def is_available(self) -> bool:
374
  """Verifica se análise de IA está disponível."""
375
+ return (self.use_ensemble and self.ensemble_available) or self.model_manager.is_model_available()
376
+
377
+ def _get_empty_result(self) -> Dict[str, Any]:
378
+ """Retorna resultado vazio para texto inválido."""
379
+ from dataclasses import asdict
380
+ empty_result = SentimentResult(
381
+ sentiment='neutral',
382
+ confidence=0.5,
383
+ label='NEUTRO',
384
+ model_used='empty_input'
385
+ )
386
+ return {
387
+ 'result': empty_result,
388
+ 'score': 0,
389
+ 'description': 'Texto vazio ou inválido',
390
+ 'ensemble_used': False
391
+ }
392
+
393
+ def _analyze_with_ensemble(self, text: str) -> Dict[str, Any]:
394
+ """Analisa texto usando sistema Ensemble."""
395
+ # Executar análise ensemble de forma síncrona
396
+ loop = None
397
+ try:
398
+ loop = asyncio.get_event_loop()
399
+ except RuntimeError:
400
+ loop = asyncio.new_event_loop()
401
+ asyncio.set_event_loop(loop)
402
+
403
+ if loop.is_running():
404
+ # Se já há um loop rodando, criar uma task
405
+ import concurrent.futures
406
+ with concurrent.futures.ThreadPoolExecutor() as executor:
407
+ future = executor.submit(asyncio.run, ensemble_ai.analyze_sentiment(text))
408
+ ensemble_result = future.result()
409
+ else:
410
+ # Executar diretamente
411
+ ensemble_result = loop.run_until_complete(ensemble_ai.analyze_sentiment(text))
412
+
413
+ # Converter resultado do ensemble para formato compatível
414
+ sentiment_result = SentimentResult(
415
+ sentiment=ensemble_result.final_prediction.lower(),
416
+ confidence=ensemble_result.confidence,
417
+ label=ensemble_result.final_prediction,
418
+ model_used=f'Ensemble ({len(ensemble_result.individual_predictions)} modelos)'
419
+ )
420
+
421
+ sentiment_score = self._convert_sentiment_to_score(ensemble_result.sentiment_score)
422
+ description = self.scorer.get_sentiment_signal_description(sentiment_result)
423
+
424
+ return {
425
+ 'result': sentiment_result,
426
+ 'score': sentiment_score,
427
+ 'description': description,
428
+ 'ensemble_used': True,
429
+ 'ensemble_details': {
430
+ 'consensus_strength': ensemble_result.consensus_strength,
431
+ 'processing_time': ensemble_result.processing_time,
432
+ 'individual_predictions': ensemble_result.individual_predictions,
433
+ 'model_weights': ensemble_result.model_weights
434
+ }
435
+ }
436
+
437
+ def _convert_sentiment_to_score(self, sentiment_score: float) -> int:
438
+ """Converte score de sentimento (-1 a 1) para escala de pontos."""
439
+ # Converter de [-1, 1] para [0, 100]
440
+ normalized_score = (sentiment_score + 1) / 2
441
+ return int(normalized_score * 100)
442
+
443
+ def set_voting_strategy(self, strategy):
444
+ """Define estratégia de votação do ensemble."""
445
+ self.voting_strategy = strategy
446
+
447
+ def toggle_ensemble(self, use_ensemble: bool):
448
+ """Ativa/desativa uso do sistema Ensemble."""
449
+ self.use_ensemble = use_ensemble and self.ensemble_available