José Wilson commited on
Commit ·
44714bc
1
Parent(s): f3fd845
update fleven-server
Browse files- .gitignore +22 -0
- Dockerfile +26 -0
- estrutura.txt +0 -0
- fleven/__init__.py +0 -0
- fleven/analysis.py +354 -0
- fleven/client.py +285 -0
- fleven/collector.py +95 -0
- fleven/mlflow_utils.py +162 -0
- fleven/server.py +358 -0
- fleven/utils.py +284 -0
- images/mlflow_print.png +0 -0
- pyproject.toml +149 -0
.gitignore
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Python
|
| 2 |
+
__pycache__/
|
| 3 |
+
*.pyc
|
| 4 |
+
venv/
|
| 5 |
+
|
| 6 |
+
# Flower
|
| 7 |
+
.flwr/
|
| 8 |
+
*.fab
|
| 9 |
+
|
| 10 |
+
# Resultados
|
| 11 |
+
results/
|
| 12 |
+
metrics/
|
| 13 |
+
mlartifacts/
|
| 14 |
+
mlruns/
|
| 15 |
+
data/
|
| 16 |
+
|
| 17 |
+
*.pt
|
| 18 |
+
*.pth
|
| 19 |
+
|
| 20 |
+
# Jupyter Notebooks
|
| 21 |
+
*.ipynb_checkpoints
|
| 22 |
+
DataClientCreate.ipynb
|
Dockerfile
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Usar uma imagem base Python slim
|
| 2 |
+
FROM python:3.11-slim
|
| 3 |
+
|
| 4 |
+
# Definir o diretório de trabalho dentro do contêiner
|
| 5 |
+
WORKDIR /app
|
| 6 |
+
|
| 7 |
+
# Copiar TODO o conteúdo do seu projeto para o diretório /app no contêiner
|
| 8 |
+
# Incluindo a pasta 'fleven', 'pyproject.toml', etc.
|
| 9 |
+
COPY . .
|
| 10 |
+
|
| 11 |
+
# Instalar as dependências do projeto definidas no pyproject.toml
|
| 12 |
+
# O "." indica para instalar o projeto no diretório atual
|
| 13 |
+
RUN pip install --no-cache-dir .
|
| 14 |
+
|
| 15 |
+
# Expor a porta que o servidor Flower (SuperNode) usará
|
| 16 |
+
# O padrão do HF Spaces é 7860, mas vamos usar 8080 que é o padrão do Flower
|
| 17 |
+
# O HF redirecionará o tráfego externo para esta porta interna
|
| 18 |
+
EXPOSE 8080
|
| 19 |
+
|
| 20 |
+
# Comando para iniciar o SERVIDOR Flower (SuperLink/SuperNode no modo servidor)
|
| 21 |
+
# NÃO use 'flwr run .', que é para simulação. Use 'flower-supernode'.
|
| 22 |
+
# '--app' aponta para seu objeto ServerApp em fleven/server.py
|
| 23 |
+
# '--address 0.0.0.0:8080' faz o servidor ouvir em todas as interfaces na porta 8080 dentro do contêiner
|
| 24 |
+
CMD ["flower-supernode", \
|
| 25 |
+
"--app", "fleven.server:app", \
|
| 26 |
+
"--address", "0.0.0.0:8080"]
|
estrutura.txt
ADDED
|
Binary file (3.71 kB). View file
|
|
|
fleven/__init__.py
ADDED
|
File without changes
|
fleven/analysis.py
ADDED
|
@@ -0,0 +1,354 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Funções para análise e visualização de resultados."""
|
| 2 |
+
import matplotlib
|
| 3 |
+
matplotlib.use("Agg")
|
| 4 |
+
|
| 5 |
+
import pandas as pd
|
| 6 |
+
import numpy as np
|
| 7 |
+
import matplotlib.pyplot as plt
|
| 8 |
+
import seaborn as sns
|
| 9 |
+
from pathlib import Path
|
| 10 |
+
import json
|
| 11 |
+
from datetime import datetime
|
| 12 |
+
from fleven.collector import MetricsCollector
|
| 13 |
+
|
| 14 |
+
plt.style.use('seaborn-v0_8-darkgrid')
|
| 15 |
+
sns.set_palette("husl")
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
def create_visualizations(collector: MetricsCollector, output_dir: Path):
|
| 19 |
+
"""Cria todas as visualizações de desempenho."""
|
| 20 |
+
|
| 21 |
+
client_ids = collector.active_client_ids
|
| 22 |
+
print(f"Analisando clientes com IDs: {client_ids}")
|
| 23 |
+
|
| 24 |
+
# Para cores dinâmicas
|
| 25 |
+
colors = sns.color_palette("husl", n_colors=len(client_ids))
|
| 26 |
+
fig, axes = plt.subplots(2, 2, figsize=(15, 12))
|
| 27 |
+
|
| 28 |
+
# Subplot 1: Desempenho Global
|
| 29 |
+
ax1 = axes[0, 0]
|
| 30 |
+
rounds = collector.train_metrics["rounds"]
|
| 31 |
+
if len(rounds) > 0 and len(collector.train_metrics["global_train_loss"]) > 0:
|
| 32 |
+
ax1.plot(rounds, collector.train_metrics["global_train_loss"],
|
| 33 |
+
'b-', marker='s', label='Treino Global', linewidth=2)
|
| 34 |
+
if len(collector.eval_metrics["rounds"]) > 0 and len(collector.eval_metrics["global_eval_loss"]) > 0:
|
| 35 |
+
ax1.plot(collector.eval_metrics["rounds"], collector.eval_metrics["global_eval_loss"],
|
| 36 |
+
'r-', marker='o', label='Validação Global', linewidth=2)
|
| 37 |
+
ax1.set_title('Desempenho do Modelo Global', fontsize=14, fontweight='bold')
|
| 38 |
+
ax1.set_xlabel('Rodada')
|
| 39 |
+
ax1.set_ylabel('Perda (MSE)')
|
| 40 |
+
ax1.legend()
|
| 41 |
+
ax1.grid(True, alpha=0.3)
|
| 42 |
+
|
| 43 |
+
# Subplot 2: Comparação entre Clientes (Treino)
|
| 44 |
+
ax2 = axes[0, 1]
|
| 45 |
+
colors = ['#2E7D32', '#1565C0', '#E65100']
|
| 46 |
+
|
| 47 |
+
for idx, client_id in enumerate(client_ids):
|
| 48 |
+
key = f"client_{client_id}_train_loss"
|
| 49 |
+
if key in collector.train_metrics and collector.train_metrics[key]:
|
| 50 |
+
client_losses = collector.train_metrics[key]
|
| 51 |
+
if len(client_losses) > 0:
|
| 52 |
+
client_rounds = [r for r_idx, r in enumerate(rounds) if r_idx < len(client_losses)]
|
| 53 |
+
if len(client_rounds) == len(client_losses):
|
| 54 |
+
ax2.plot(client_rounds, client_losses,
|
| 55 |
+
marker='o', label=f'Cliente {client_id}', color=colors[idx], linewidth=1.5)
|
| 56 |
+
|
| 57 |
+
if len(rounds) > 0 and len(collector.train_metrics["global_train_loss"]) > 0:
|
| 58 |
+
ax2.plot(rounds, collector.train_metrics["global_train_loss"],
|
| 59 |
+
'k--', label='Média Global', linewidth=2, alpha=0.7)
|
| 60 |
+
ax2.set_title('Perda de Treinamento por Cliente', fontsize=14, fontweight='bold')
|
| 61 |
+
ax2.set_xlabel('Rodada')
|
| 62 |
+
ax2.set_ylabel('Perda de Treino (MSE)')
|
| 63 |
+
ax2.legend()
|
| 64 |
+
ax2.grid(True, alpha=0.3)
|
| 65 |
+
|
| 66 |
+
# Subplot 3: Comparação entre Clientes (Validação)
|
| 67 |
+
ax3 = axes[1, 0]
|
| 68 |
+
eval_rounds = collector.eval_metrics["rounds"]
|
| 69 |
+
|
| 70 |
+
for idx, client_id in enumerate(client_ids):
|
| 71 |
+
key = f"client_{client_id}_eval_loss"
|
| 72 |
+
if key in collector.eval_metrics and collector.eval_metrics[key]:
|
| 73 |
+
client_losses = collector.eval_metrics[key]
|
| 74 |
+
if len(client_losses) > 0:
|
| 75 |
+
client_rounds = [r for r_idx, r in enumerate(eval_rounds) if r_idx < len(client_losses)]
|
| 76 |
+
if len(client_rounds) == len(client_losses):
|
| 77 |
+
ax3.plot(client_rounds, client_losses,
|
| 78 |
+
marker='s', label=f'Cliente {client_id}', color=colors[idx], linewidth=1.5)
|
| 79 |
+
|
| 80 |
+
if len(eval_rounds) > 0 and len(collector.eval_metrics["global_eval_loss"]) > 0:
|
| 81 |
+
ax3.plot(eval_rounds, collector.eval_metrics["global_eval_loss"],
|
| 82 |
+
'k--', label='Média Global', linewidth=2, alpha=0.7)
|
| 83 |
+
ax3.set_title('Perda de Validação por Cliente', fontsize=14, fontweight='bold')
|
| 84 |
+
ax3.set_xlabel('Rodada')
|
| 85 |
+
ax3.set_ylabel('Perda de Validação (MSE)')
|
| 86 |
+
ax3.legend()
|
| 87 |
+
ax3.grid(True, alpha=0.3)
|
| 88 |
+
|
| 89 |
+
# Subplot 4: Taxa de Melhoria
|
| 90 |
+
ax4 = axes[1, 1]
|
| 91 |
+
if len(rounds) > 1 and len(collector.train_metrics["global_train_loss"]) > 1:
|
| 92 |
+
train_improvement = np.diff(collector.train_metrics["global_train_loss"])
|
| 93 |
+
ax4.plot(rounds[1:], train_improvement, 'g-', marker='v', label='Δ Treino', linewidth=1.5)
|
| 94 |
+
|
| 95 |
+
if len(eval_rounds) > 1 and len(collector.eval_metrics["global_eval_loss"]) > 1:
|
| 96 |
+
eval_improvement = np.diff(collector.eval_metrics["global_eval_loss"])
|
| 97 |
+
ax4.plot(eval_rounds[1:], eval_improvement,
|
| 98 |
+
'm-', marker='^', label='Δ Validação', linewidth=1.5)
|
| 99 |
+
|
| 100 |
+
ax4.axhline(y=0, color='k', linestyle='--', alpha=0.5)
|
| 101 |
+
ax4.set_title('Taxa de Melhoria (Δ Perda)', fontsize=14, fontweight='bold')
|
| 102 |
+
ax4.set_xlabel('Rodada')
|
| 103 |
+
ax4.set_ylabel('Mudança na Perda')
|
| 104 |
+
ax4.legend()
|
| 105 |
+
ax4.grid(True, alpha=0.3)
|
| 106 |
+
|
| 107 |
+
plt.suptitle(f'Análise de Desempenho - Estratégia: {collector.strategy_name.upper()}',
|
| 108 |
+
fontsize=16, fontweight='bold')
|
| 109 |
+
plt.tight_layout()
|
| 110 |
+
plt.savefig(output_dir / f'performance_analysis_{collector.strategy_name}.pdf', dpi=300, bbox_inches='tight')
|
| 111 |
+
plt.close()
|
| 112 |
+
|
| 113 |
+
# Gráfico de Convergência
|
| 114 |
+
_create_convergence_plot(collector, output_dir, eval_rounds)
|
| 115 |
+
|
| 116 |
+
# Heatmap
|
| 117 |
+
_create_heatmap(collector, output_dir, eval_rounds)
|
| 118 |
+
|
| 119 |
+
print(f"Visualizações salvas em {output_dir}")
|
| 120 |
+
|
| 121 |
+
|
| 122 |
+
def _create_convergence_plot(collector: MetricsCollector, output_dir: Path, eval_rounds):
|
| 123 |
+
"""Cria gráfico de convergência."""
|
| 124 |
+
client_ids = collector.active_client_ids
|
| 125 |
+
fig, axes = plt.subplots(1, 3, figsize=(18, 6))
|
| 126 |
+
|
| 127 |
+
for round_idx, round_num in enumerate(eval_rounds):
|
| 128 |
+
client_losses = []
|
| 129 |
+
for client_id in client_ids:
|
| 130 |
+
key = f"client_{client_id}_eval_loss"
|
| 131 |
+
if key in collector.eval_metrics and round_idx < len(collector.eval_metrics[key]):
|
| 132 |
+
client_losses.append(collector.eval_metrics[key][round_idx])
|
| 133 |
+
|
| 134 |
+
if len(client_losses) > 1:
|
| 135 |
+
var, std, diff = collector.calculate_convergence_metrics(client_losses)
|
| 136 |
+
collector.convergence_metrics["rounds"].append(round_num)
|
| 137 |
+
collector.convergence_metrics["loss_variance"].append(var)
|
| 138 |
+
collector.convergence_metrics["loss_std"].append(std)
|
| 139 |
+
collector.convergence_metrics["max_min_diff"].append(diff)
|
| 140 |
+
|
| 141 |
+
if len(collector.convergence_metrics["rounds"]) > 0:
|
| 142 |
+
ax1 = axes[0]
|
| 143 |
+
ax1.plot(collector.convergence_metrics["rounds"],
|
| 144 |
+
collector.convergence_metrics["loss_variance"],
|
| 145 |
+
'b-', marker='o', linewidth=2)
|
| 146 |
+
ax1.fill_between(collector.convergence_metrics["rounds"],
|
| 147 |
+
collector.convergence_metrics["loss_variance"],
|
| 148 |
+
alpha=0.3)
|
| 149 |
+
ax1.set_title('Variância da Perda entre Clientes', fontsize=14, fontweight='bold')
|
| 150 |
+
ax1.set_xlabel('Rodada')
|
| 151 |
+
ax1.set_ylabel('Variância')
|
| 152 |
+
ax1.grid(True, alpha=0.3)
|
| 153 |
+
|
| 154 |
+
ax2 = axes[1]
|
| 155 |
+
ax2.plot(collector.convergence_metrics["rounds"],
|
| 156 |
+
collector.convergence_metrics["loss_std"],
|
| 157 |
+
'g-', marker='s', linewidth=2)
|
| 158 |
+
ax2.fill_between(collector.convergence_metrics["rounds"],
|
| 159 |
+
collector.convergence_metrics["loss_std"],
|
| 160 |
+
alpha=0.3, color='green')
|
| 161 |
+
ax2.set_title('Desvio Padrão da Perda entre Clientes', fontsize=14, fontweight='bold')
|
| 162 |
+
ax2.set_xlabel('Rodada')
|
| 163 |
+
ax2.set_ylabel('Desvio Padrão')
|
| 164 |
+
ax2.grid(True, alpha=0.3)
|
| 165 |
+
|
| 166 |
+
ax3 = axes[2]
|
| 167 |
+
ax3.plot(collector.convergence_metrics["rounds"],
|
| 168 |
+
collector.convergence_metrics["max_min_diff"],
|
| 169 |
+
'r-', marker='^', linewidth=2)
|
| 170 |
+
ax3.fill_between(collector.convergence_metrics["rounds"],
|
| 171 |
+
collector.convergence_metrics["max_min_diff"],
|
| 172 |
+
alpha=0.3, color='red')
|
| 173 |
+
ax3.set_title('Diferença Máx-Mín entre Clientes', fontsize=14, fontweight='bold')
|
| 174 |
+
ax3.set_xlabel('Rodada')
|
| 175 |
+
ax3.set_ylabel('Diferença')
|
| 176 |
+
ax3.grid(True, alpha=0.3)
|
| 177 |
+
else:
|
| 178 |
+
for ax in axes:
|
| 179 |
+
ax.text(0.5, 0.5, 'Dados insuficientes',
|
| 180 |
+
ha='center', va='center', transform=ax.transAxes)
|
| 181 |
+
|
| 182 |
+
plt.suptitle(f'Análise de Convergência e Heterogeneidade - {collector.strategy_name.upper()}',
|
| 183 |
+
fontsize=16, fontweight='bold')
|
| 184 |
+
plt.tight_layout()
|
| 185 |
+
plt.savefig(output_dir / f'convergence_analysis_{collector.strategy_name}.pdf', dpi=300, bbox_inches='tight')
|
| 186 |
+
plt.close()
|
| 187 |
+
|
| 188 |
+
|
| 189 |
+
def _create_heatmap(collector: MetricsCollector, output_dir: Path, eval_rounds):
|
| 190 |
+
"""Cria heatmap de performance."""
|
| 191 |
+
client_ids = collector.active_client_ids
|
| 192 |
+
fig, ax = plt.subplots(figsize=(12, 6))
|
| 193 |
+
|
| 194 |
+
heatmap_data = []
|
| 195 |
+
max_rounds = len(eval_rounds) if eval_rounds else 0
|
| 196 |
+
|
| 197 |
+
has_data = False
|
| 198 |
+
for client_id in client_ids:
|
| 199 |
+
key = f"client_{client_id}_eval_loss"
|
| 200 |
+
if key in collector.eval_metrics and collector.eval_metrics[key]:
|
| 201 |
+
has_data = True
|
| 202 |
+
break
|
| 203 |
+
|
| 204 |
+
if has_data and max_rounds > 0:
|
| 205 |
+
for client_id in client_ids:
|
| 206 |
+
key = f"client_{client_id}_eval_loss"
|
| 207 |
+
if key in collector.eval_metrics and collector.eval_metrics[key]:
|
| 208 |
+
client_data = list(collector.eval_metrics[key])
|
| 209 |
+
while len(client_data) < max_rounds:
|
| 210 |
+
client_data.append(np.nan)
|
| 211 |
+
heatmap_data.append(client_data[:max_rounds])
|
| 212 |
+
else:
|
| 213 |
+
heatmap_data.append([np.nan] * max_rounds)
|
| 214 |
+
|
| 215 |
+
heatmap_array = np.array(heatmap_data, dtype=float)
|
| 216 |
+
masked_array = np.ma.masked_invalid(heatmap_array)
|
| 217 |
+
|
| 218 |
+
im = ax.imshow(masked_array, aspect='auto', cmap='RdYlGn_r')
|
| 219 |
+
|
| 220 |
+
ax.set_xticks(range(max_rounds))
|
| 221 |
+
ax.set_xticklabels(eval_rounds[:max_rounds])
|
| 222 |
+
ax.set_yticks(range(len(client_ids)))
|
| 223 |
+
ax.set_yticklabels([f'Cliente {cid}' for cid in client_ids])
|
| 224 |
+
|
| 225 |
+
ax.set_xlabel('Rodada', fontsize=12)
|
| 226 |
+
ax.set_title(f'Mapa de Calor - Perda de Validação por Cliente - {collector.strategy_name.upper()}',
|
| 227 |
+
fontsize=14, fontweight='bold')
|
| 228 |
+
|
| 229 |
+
for i in range(len(heatmap_data)):
|
| 230 |
+
for j in range(min(len(heatmap_data[i]), max_rounds)):
|
| 231 |
+
if not np.isnan(heatmap_data[i][j]):
|
| 232 |
+
text = ax.text(j, i, f'{heatmap_data[i][j]:.4f}',
|
| 233 |
+
ha="center", va="center", color="black", fontsize=8)
|
| 234 |
+
|
| 235 |
+
plt.colorbar(im, ax=ax, label='Perda (MSE)')
|
| 236 |
+
else:
|
| 237 |
+
ax.text(0.5, 0.5, 'Dados insuficientes para gerar heatmap',
|
| 238 |
+
ha='center', va='center', transform=ax.transAxes, fontsize=12)
|
| 239 |
+
ax.set_title(f'Mapa de Calor - Perda de Validação por Cliente - {collector.strategy_name.upper()}',
|
| 240 |
+
fontsize=14, fontweight='bold')
|
| 241 |
+
|
| 242 |
+
plt.tight_layout()
|
| 243 |
+
plt.savefig(output_dir / f'heatmap_performance_{collector.strategy_name}.pdf', dpi=300, bbox_inches='tight')
|
| 244 |
+
plt.close()
|
| 245 |
+
|
| 246 |
+
|
| 247 |
+
def save_detailed_metrics(collector: MetricsCollector, output_dir: Path):
|
| 248 |
+
"""Salva métricas detalhadas em diferentes formatos."""
|
| 249 |
+
|
| 250 |
+
# Salvar CSVs de treino e avaliação
|
| 251 |
+
if collector.train_metrics_by_round:
|
| 252 |
+
train_df = pd.DataFrame(collector.train_metrics_by_round)
|
| 253 |
+
train_df['phase'] = 'train'
|
| 254 |
+
train_csv = output_dir / f'train_metrics_{collector.strategy_name}.csv'
|
| 255 |
+
train_df.to_csv(train_csv, index=False)
|
| 256 |
+
print(f"Métricas de treino salvas em {train_csv}")
|
| 257 |
+
|
| 258 |
+
if collector.eval_metrics_by_round:
|
| 259 |
+
eval_df = pd.DataFrame(collector.eval_metrics_by_round)
|
| 260 |
+
eval_df['phase'] = 'eval'
|
| 261 |
+
eval_csv = output_dir / f'eval_metrics_{collector.strategy_name}.csv'
|
| 262 |
+
eval_df.to_csv(eval_csv, index=False)
|
| 263 |
+
print(f"Métricas de avaliação salvas em {eval_csv}")
|
| 264 |
+
|
| 265 |
+
# Calcular estatísticas
|
| 266 |
+
stats = {
|
| 267 |
+
"strategy": collector.strategy_name,
|
| 268 |
+
"total_rounds": len(collector.train_metrics["rounds"]),
|
| 269 |
+
"final_global_train_loss": float(collector.train_metrics["global_train_loss"][-1]) if collector.train_metrics["global_train_loss"] else None,
|
| 270 |
+
"final_global_eval_loss": float(collector.eval_metrics["global_eval_loss"][-1]) if collector.eval_metrics["global_eval_loss"] else None,
|
| 271 |
+
"train_improvement": float((collector.train_metrics["global_train_loss"][0] - collector.train_metrics["global_train_loss"][-1])) if len(collector.train_metrics["global_train_loss"]) > 1 else 0,
|
| 272 |
+
"eval_improvement": float((collector.eval_metrics["global_eval_loss"][0] - collector.eval_metrics["global_eval_loss"][-1])) if len(collector.eval_metrics["global_eval_loss"]) > 1 else 0,
|
| 273 |
+
"timestamp": datetime.now().isoformat()
|
| 274 |
+
}
|
| 275 |
+
|
| 276 |
+
if collector.convergence_metrics["rounds"]:
|
| 277 |
+
stats["convergence_metrics"] = {
|
| 278 |
+
"rounds": collector.convergence_metrics["rounds"],
|
| 279 |
+
"final_variance": float(collector.convergence_metrics["loss_variance"][-1]) if collector.convergence_metrics["loss_variance"] else None,
|
| 280 |
+
"final_std": float(collector.convergence_metrics["loss_std"][-1]) if collector.convergence_metrics["loss_std"] else None,
|
| 281 |
+
"final_max_min_diff": float(collector.convergence_metrics["max_min_diff"][-1]) if collector.convergence_metrics["max_min_diff"] else None,
|
| 282 |
+
}
|
| 283 |
+
|
| 284 |
+
# Salvar JSON
|
| 285 |
+
json_file = output_dir / f'analysis_{collector.strategy_name}.json'
|
| 286 |
+
with open(json_file, 'w') as f:
|
| 287 |
+
json.dump(stats, f, indent=2)
|
| 288 |
+
print(f"Análise estatística salva em {json_file}")
|
| 289 |
+
|
| 290 |
+
# Salvar sumário em texto
|
| 291 |
+
_save_summary_text(collector, stats, output_dir)
|
| 292 |
+
|
| 293 |
+
|
| 294 |
+
def _save_summary_text(collector: MetricsCollector, stats: dict, output_dir: Path):
|
| 295 |
+
"""Salva sumário em formato texto."""
|
| 296 |
+
summary_file = output_dir / f'summary_{collector.strategy_name}.txt'
|
| 297 |
+
with open(summary_file, 'w', encoding='utf-8') as f:
|
| 298 |
+
f.write("="*60 + "\n")
|
| 299 |
+
f.write(f"RELATÓRIO DE TREINAMENTO - {collector.strategy_name.upper()}\n")
|
| 300 |
+
f.write("="*60 + "\n\n")
|
| 301 |
+
|
| 302 |
+
f.write(f"Data: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n")
|
| 303 |
+
f.write(f"Total de rodadas: {stats['total_rounds']}\n\n")
|
| 304 |
+
|
| 305 |
+
if stats['final_global_train_loss']:
|
| 306 |
+
f.write("--- MÉTRICAS DE TREINO ---\n")
|
| 307 |
+
f.write(f"Loss inicial: {collector.train_metrics['global_train_loss'][0]:.6f}\n")
|
| 308 |
+
f.write(f"Loss final: {stats['final_global_train_loss']:.6f}\n")
|
| 309 |
+
f.write(f"Melhoria: {stats['train_improvement']:.6f} ({(stats['train_improvement']/collector.train_metrics['global_train_loss'][0]*100):.2f}%)\n\n")
|
| 310 |
+
|
| 311 |
+
if stats['final_global_eval_loss']:
|
| 312 |
+
f.write("--- MÉTRICAS DE AVALIAÇÃO ---\n")
|
| 313 |
+
f.write(f"Loss inicial: {collector.eval_metrics['global_eval_loss'][0]:.6f}\n")
|
| 314 |
+
f.write(f"Loss final: {stats['final_global_eval_loss']:.6f}\n")
|
| 315 |
+
f.write(f"Melhoria: {stats['eval_improvement']:.6f} ({(stats['eval_improvement']/collector.eval_metrics['global_eval_loss'][0]*100):.2f}%)\n\n")
|
| 316 |
+
|
| 317 |
+
if 'convergence_metrics' in stats:
|
| 318 |
+
f.write("--- MÉTRICAS DE CONVERGÊNCIA ---\n")
|
| 319 |
+
f.write(f"Desvio padrão final: {stats['convergence_metrics']['final_std']:.6f}\n")
|
| 320 |
+
f.write(f"Variância final: {stats['convergence_metrics']['final_variance']:.6f}\n")
|
| 321 |
+
f.write(f"Diferença máx-mín final: {stats['convergence_metrics']['final_max_min_diff']:.6f}\n")
|
| 322 |
+
|
| 323 |
+
print(f"Sumário salvo em {summary_file}")
|
| 324 |
+
|
| 325 |
+
|
| 326 |
+
def print_final_summary(collector: MetricsCollector):
|
| 327 |
+
"""Imprime sumário final no console."""
|
| 328 |
+
print("\n" + "="*60)
|
| 329 |
+
print("RESUMO DO TREINAMENTO")
|
| 330 |
+
print("="*60)
|
| 331 |
+
|
| 332 |
+
if collector.train_metrics["global_train_loss"]:
|
| 333 |
+
initial_loss = collector.train_metrics["global_train_loss"][0]
|
| 334 |
+
final_loss = collector.train_metrics["global_train_loss"][-1]
|
| 335 |
+
improvement = ((initial_loss - final_loss) / initial_loss) * 100
|
| 336 |
+
|
| 337 |
+
print(f"Perda inicial de treino: {initial_loss:.6f}")
|
| 338 |
+
print(f"Perda final de treino: {final_loss:.6f}")
|
| 339 |
+
print(f"Melhoria no treino: {improvement:.2f}%")
|
| 340 |
+
|
| 341 |
+
if collector.eval_metrics["global_eval_loss"]:
|
| 342 |
+
initial_eval = collector.eval_metrics["global_eval_loss"][0]
|
| 343 |
+
final_eval = collector.eval_metrics["global_eval_loss"][-1]
|
| 344 |
+
eval_improvement = ((initial_eval - final_eval) / initial_eval) * 100
|
| 345 |
+
|
| 346 |
+
print(f"\nPerda inicial de validação: {initial_eval:.6f}")
|
| 347 |
+
print(f"Perda final de validação: {final_eval:.6f}")
|
| 348 |
+
print(f"Melhoria na validação: {eval_improvement:.2f}%")
|
| 349 |
+
|
| 350 |
+
# Análise de convergência
|
| 351 |
+
if collector.convergence_metrics["loss_std"]:
|
| 352 |
+
final_std = collector.convergence_metrics["loss_std"][-1]
|
| 353 |
+
print(f"\nDesvio padrão final entre clientes: {final_std:.6f}")
|
| 354 |
+
print(f"Convergência: {'Boa' if final_std < 0.01 else 'Moderada' if final_std < 0.05 else 'Baixa'}")
|
fleven/client.py
ADDED
|
@@ -0,0 +1,285 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import torch
|
| 2 |
+
from flwr.app import Context, Message, ArrayRecord, MetricRecord, RecordDict
|
| 3 |
+
from flwr.clientapp import ClientApp
|
| 4 |
+
from fleven.utils import get_model, load_data, train, test, set_seed
|
| 5 |
+
from pathlib import Path
|
| 6 |
+
import json
|
| 7 |
+
from datetime import datetime
|
| 8 |
+
|
| 9 |
+
# Verifica se a GPU está disponível
|
| 10 |
+
DEVICE = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
|
| 11 |
+
|
| 12 |
+
class MetricsTracker:
|
| 13 |
+
"""Classe para rastrear e salvar métricas locais do cliente."""
|
| 14 |
+
|
| 15 |
+
def __init__(self, client_id, metrics_base_path=None):
|
| 16 |
+
self.client_id = client_id
|
| 17 |
+
|
| 18 |
+
# 🔧 Define o diretório de métricas de forma robusta
|
| 19 |
+
if metrics_base_path:
|
| 20 |
+
# Usa o caminho configurado
|
| 21 |
+
self.metrics_dir = Path(metrics_base_path) / f"client_{self.client_id}"
|
| 22 |
+
else:
|
| 23 |
+
# Tenta usar caminho relativo ao arquivo atual
|
| 24 |
+
base_dir = Path(__file__).parent.parent
|
| 25 |
+
self.metrics_dir = base_dir / "metrics" / f"client_{self.client_id}"
|
| 26 |
+
|
| 27 |
+
# Cria o diretório se não existir
|
| 28 |
+
self.metrics_dir.mkdir(parents=True, exist_ok=True)
|
| 29 |
+
self.history_file = self.metrics_dir / "metrics_history.json"
|
| 30 |
+
self.history = self.load_history()
|
| 31 |
+
|
| 32 |
+
print(f"[Cliente {self.client_id}] Métricas serão salvas em: {self.metrics_dir.absolute()}")
|
| 33 |
+
|
| 34 |
+
def load_history(self):
|
| 35 |
+
if self.history_file.exists():
|
| 36 |
+
with open(self.history_file, 'r') as f:
|
| 37 |
+
return json.load(f)
|
| 38 |
+
return {"train": [], "eval": []}
|
| 39 |
+
|
| 40 |
+
def get_next_round_number(self):
|
| 41 |
+
"""Retorna o próximo número de rodada baseado no histórico."""
|
| 42 |
+
if not self.history["train"]:
|
| 43 |
+
return 1
|
| 44 |
+
return self.history["train"][-1]["round"] + 1
|
| 45 |
+
|
| 46 |
+
def add_train_metrics(self, round_num, loss):
|
| 47 |
+
existing_rounds = [entry["round"] for entry in self.history["train"]]
|
| 48 |
+
if round_num not in existing_rounds:
|
| 49 |
+
self.history["train"].append({
|
| 50 |
+
"round": round_num,
|
| 51 |
+
"loss": loss,
|
| 52 |
+
"timestamp": datetime.now().isoformat()
|
| 53 |
+
})
|
| 54 |
+
else:
|
| 55 |
+
for entry in self.history["train"]:
|
| 56 |
+
if entry["round"] == round_num:
|
| 57 |
+
entry["loss"] = loss
|
| 58 |
+
entry["timestamp"] = datetime.now().isoformat()
|
| 59 |
+
break
|
| 60 |
+
|
| 61 |
+
def add_eval_metrics(self, round_num, loss):
|
| 62 |
+
existing_rounds = [entry["round"] for entry in self.history["eval"]]
|
| 63 |
+
if round_num not in existing_rounds:
|
| 64 |
+
self.history["eval"].append({
|
| 65 |
+
"round": round_num,
|
| 66 |
+
"loss": loss,
|
| 67 |
+
"timestamp": datetime.now().isoformat()
|
| 68 |
+
})
|
| 69 |
+
else:
|
| 70 |
+
for entry in self.history["eval"]:
|
| 71 |
+
if entry["round"] == round_num:
|
| 72 |
+
entry["loss"] = loss
|
| 73 |
+
entry["timestamp"] = datetime.now().isoformat()
|
| 74 |
+
break
|
| 75 |
+
|
| 76 |
+
def save_metrics(self):
|
| 77 |
+
try:
|
| 78 |
+
with open(self.history_file, 'w') as f:
|
| 79 |
+
json.dump(self.history, f, indent=4)
|
| 80 |
+
print(f"[Cliente {self.client_id}] Métricas salvas com sucesso")
|
| 81 |
+
except Exception as e:
|
| 82 |
+
print(f"[Cliente {self.client_id}] AVISO: Não foi possível salvar métricas: {e}")
|
| 83 |
+
|
| 84 |
+
def save_checkpoint(self, net, round_num):
|
| 85 |
+
try:
|
| 86 |
+
model_path = self.metrics_dir / f"model_round_{round_num}.pt"
|
| 87 |
+
torch.save(net.state_dict(), model_path)
|
| 88 |
+
print(f"[Cliente {self.client_id}] Checkpoint salvo: {model_path}")
|
| 89 |
+
except Exception as e:
|
| 90 |
+
print(f"[Cliente {self.client_id}] AVISO: Não foi possível salvar checkpoint: {e}")
|
| 91 |
+
|
| 92 |
+
# Estado do ator para evitar recarregar dados a cada rodada
|
| 93 |
+
model_state = {
|
| 94 |
+
"net": None,
|
| 95 |
+
"trainloader": None,
|
| 96 |
+
"testloader": None,
|
| 97 |
+
"metrics_tracker": None,
|
| 98 |
+
"client_id": None,
|
| 99 |
+
}
|
| 100 |
+
|
| 101 |
+
def initialize_client_state(client_id: int, context: Context):
|
| 102 |
+
"""Inicializa o estado do cliente lendo configurações do Context."""
|
| 103 |
+
if model_state["client_id"] == client_id:
|
| 104 |
+
return
|
| 105 |
+
|
| 106 |
+
global_seed = int(context.run_config.get("seed", 42))
|
| 107 |
+
# seed única para cada cliente
|
| 108 |
+
client_seed = global_seed + client_id
|
| 109 |
+
set_seed(client_seed)
|
| 110 |
+
print(f"[Cliente {client_id}] Usando seed local para replicação: {client_seed}")
|
| 111 |
+
|
| 112 |
+
print(f"Ator [Node {context.node_id}] a inicializar para Client ID: {client_id}")
|
| 113 |
+
|
| 114 |
+
# lendo algumas configs do context
|
| 115 |
+
sequence_length = int(context.run_config.get("sequence-length", 60))
|
| 116 |
+
prediction_length = int(context.run_config.get("prediction-length", 10))
|
| 117 |
+
batch_size = int(context.run_config.get("batch-size", 32))
|
| 118 |
+
train_test_split = float(context.run_config.get("train-test-split", 0.8))
|
| 119 |
+
|
| 120 |
+
target_column = str(context.run_config.get("target-column", "P_kW"))
|
| 121 |
+
print(f"[Cliente {client_id}] Configurado para prever a coluna: '{target_column}'")
|
| 122 |
+
|
| 123 |
+
model_type = context.run_config.get("model-type", "lstm")
|
| 124 |
+
num_layers = int(context.run_config.get("num-layers", 1))
|
| 125 |
+
|
| 126 |
+
# Parâmetros para "lstm" e "mlp"
|
| 127 |
+
hidden_size = int(context.run_config.get("hidden-size", 32))
|
| 128 |
+
|
| 129 |
+
# Parâmetros para "lstm_dense" (o novo modelo adaptado)
|
| 130 |
+
lstm_hidden_size = int(context.run_config.get("lstm-hidden-size", 32))
|
| 131 |
+
dense_hidden_size = int(context.run_config.get("dense-hidden-size", 16))
|
| 132 |
+
|
| 133 |
+
# Parâmetro de Dropout para "lstm" e "lstm_dense"
|
| 134 |
+
dropout = float(context.run_config.get("dropout", 0.0))
|
| 135 |
+
|
| 136 |
+
# 🔧 Lê os caminhos configurados
|
| 137 |
+
data_base_path = context.run_config.get("data-base-path", None)
|
| 138 |
+
metrics_base_path = context.run_config.get("metrics-base-path", None)
|
| 139 |
+
|
| 140 |
+
# Carrega dados com as configurações
|
| 141 |
+
trainloader, testloader, num_features = load_data(
|
| 142 |
+
client_id,
|
| 143 |
+
sequence_length,
|
| 144 |
+
prediction_length,
|
| 145 |
+
batch_size,
|
| 146 |
+
train_test_split,
|
| 147 |
+
data_base_path=data_base_path,
|
| 148 |
+
target_column=target_column
|
| 149 |
+
)
|
| 150 |
+
|
| 151 |
+
# -todos os parâmetros para o model_config::
|
| 152 |
+
|
| 153 |
+
# 🔧 Dicionário de configuração do modelo
|
| 154 |
+
model_config = {
|
| 155 |
+
"name": model_type,
|
| 156 |
+
"input_size": num_features, # retornado pelo load_data
|
| 157 |
+
"output_size": prediction_length,
|
| 158 |
+
"num_layers": num_layers,
|
| 159 |
+
"sequence_length": sequence_length,
|
| 160 |
+
|
| 161 |
+
# Parâmetros para "lstm" e "mlp"
|
| 162 |
+
"hidden_size": hidden_size,
|
| 163 |
+
|
| 164 |
+
# Parâmetros para "lstm_dense"
|
| 165 |
+
"lstm_hidden_size": lstm_hidden_size,
|
| 166 |
+
"dense_hidden_size": dense_hidden_size,
|
| 167 |
+
|
| 168 |
+
# Parâmetro de Dropout
|
| 169 |
+
"dropout": dropout
|
| 170 |
+
}
|
| 171 |
+
|
| 172 |
+
# Cria rede com as configurações
|
| 173 |
+
net = get_model(model_config).to(DEVICE)
|
| 174 |
+
|
| 175 |
+
model_state.update({
|
| 176 |
+
"net": net,
|
| 177 |
+
"trainloader": trainloader,
|
| 178 |
+
"testloader": testloader,
|
| 179 |
+
"metrics_tracker": MetricsTracker(client_id, metrics_base_path),
|
| 180 |
+
"client_id": client_id,
|
| 181 |
+
})
|
| 182 |
+
print(f"[Cliente {client_id}] inicializado com {len(trainloader.dataset)} amostras de treino.")
|
| 183 |
+
|
| 184 |
+
# Cria a aplicação cliente
|
| 185 |
+
app = ClientApp()
|
| 186 |
+
|
| 187 |
+
@app.train()
|
| 188 |
+
def train_fn(msg: Message, context: Context) -> Message:
|
| 189 |
+
"""Treina o modelo localmente."""
|
| 190 |
+
# 🔧 faz um mapeamento partition-id para client_id
|
| 191 |
+
partition_id = int(context.node_config["partition-id"])
|
| 192 |
+
client_id = partition_id
|
| 193 |
+
|
| 194 |
+
print(f"[DEBUG] Train - partition-id={partition_id} -> client_id={client_id}")
|
| 195 |
+
|
| 196 |
+
initialize_client_state(client_id, context)
|
| 197 |
+
|
| 198 |
+
net = model_state["net"]
|
| 199 |
+
trainloader = model_state["trainloader"]
|
| 200 |
+
metrics_tracker = model_state["metrics_tracker"]
|
| 201 |
+
|
| 202 |
+
# Obtém o próximo número de rodada do histórico
|
| 203 |
+
#round_num = metrics_tracker.get_next_round_number()
|
| 204 |
+
round_num = int(msg.content["config"].get("server-round", 0))
|
| 205 |
+
|
| 206 |
+
print(f"\n[Node {context.node_id}, Cliente {client_id}] === Ronda de Treino {round_num} ===")
|
| 207 |
+
|
| 208 |
+
arrays = msg.content["arrays"]
|
| 209 |
+
net.load_state_dict(arrays.to_torch_state_dict())
|
| 210 |
+
|
| 211 |
+
# Lê configurações de treino do context
|
| 212 |
+
local_epochs = int(context.run_config.get("local-epochs", 1))
|
| 213 |
+
learning_rate = float(context.run_config.get("learning-rate", 1e-5))
|
| 214 |
+
max_grad_norm = float(context.run_config.get("max-grad-norm", 1.0))
|
| 215 |
+
save_checkpoint_every = int(context.run_config.get("save-checkpoint-every", 5))
|
| 216 |
+
|
| 217 |
+
avg_train_loss = train(
|
| 218 |
+
net,
|
| 219 |
+
trainloader,
|
| 220 |
+
epochs=local_epochs,
|
| 221 |
+
learning_rate=learning_rate,
|
| 222 |
+
max_grad_norm=max_grad_norm,
|
| 223 |
+
device=DEVICE
|
| 224 |
+
)
|
| 225 |
+
print(f"[Node {context.node_id}, Cliente {client_id}] Perda de treino: {avg_train_loss:.6f}")
|
| 226 |
+
|
| 227 |
+
metrics_tracker.add_train_metrics(round_num, avg_train_loss)
|
| 228 |
+
if round_num % save_checkpoint_every == 0:
|
| 229 |
+
metrics_tracker.save_checkpoint(net, round_num)
|
| 230 |
+
metrics_tracker.save_metrics()
|
| 231 |
+
|
| 232 |
+
model_record = ArrayRecord(net.state_dict())
|
| 233 |
+
metrics = MetricRecord({
|
| 234 |
+
"train_loss": avg_train_loss,
|
| 235 |
+
"num-examples": len(trainloader.dataset),
|
| 236 |
+
"client_id": client_id,
|
| 237 |
+
})
|
| 238 |
+
|
| 239 |
+
content = RecordDict({"arrays": model_record, "metrics": metrics})
|
| 240 |
+
return Message(content=content, reply_to=msg)
|
| 241 |
+
|
| 242 |
+
@app.evaluate()
|
| 243 |
+
def evaluate_fn(msg: Message, context: Context) -> Message:
|
| 244 |
+
"""Avalia o modelo localmente."""
|
| 245 |
+
|
| 246 |
+
# partition-id para client_id
|
| 247 |
+
partition_id = int(context.node_config["partition-id"])
|
| 248 |
+
client_id = partition_id
|
| 249 |
+
|
| 250 |
+
print(f"[DEBUG] Evaluate - partition-id={partition_id} -> client_id={client_id}")
|
| 251 |
+
|
| 252 |
+
initialize_client_state(client_id, context)
|
| 253 |
+
|
| 254 |
+
net = model_state["net"]
|
| 255 |
+
testloader = model_state["testloader"]
|
| 256 |
+
metrics_tracker = model_state["metrics_tracker"]
|
| 257 |
+
|
| 258 |
+
# Usa o último número de rodada do histórico de treino
|
| 259 |
+
if metrics_tracker.history["train"]:
|
| 260 |
+
#round_num = metrics_tracker.history["train"][-1]["round"]
|
| 261 |
+
round_num = int(msg.content["config"].get("server-round", 0))
|
| 262 |
+
else:
|
| 263 |
+
round_num = 1
|
| 264 |
+
|
| 265 |
+
arrays = msg.content["arrays"]
|
| 266 |
+
net.load_state_dict(arrays.to_torch_state_dict())
|
| 267 |
+
|
| 268 |
+
loss, num_examples = test(net, testloader, device=DEVICE)
|
| 269 |
+
print(f"[Node {context.node_id}, Cliente {client_id}] Perda de validação: {loss:.6f}")
|
| 270 |
+
|
| 271 |
+
metrics_tracker.add_eval_metrics(round_num, loss)
|
| 272 |
+
metrics_tracker.save_metrics()
|
| 273 |
+
|
| 274 |
+
metrics = MetricRecord({
|
| 275 |
+
"eval_loss": loss,
|
| 276 |
+
"num-examples": num_examples,
|
| 277 |
+
"client_id": client_id,
|
| 278 |
+
})
|
| 279 |
+
|
| 280 |
+
content = RecordDict({"metrics": metrics})
|
| 281 |
+
return Message(content=content, reply_to=msg)
|
| 282 |
+
|
| 283 |
+
if __name__ == "__main__":
|
| 284 |
+
print("Cliente pronto para ser executado com Flower 1.22.0")
|
| 285 |
+
print("Use: flwr run . ou flower-supernode para deployment")
|
fleven/collector.py
ADDED
|
@@ -0,0 +1,95 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Coletor de métricas para análise de treinamento federado."""
|
| 2 |
+
import numpy as np
|
| 3 |
+
|
| 4 |
+
|
| 5 |
+
class MetricsCollector:
|
| 6 |
+
"""Coleta e organiza métricas de treinamento e validação."""
|
| 7 |
+
|
| 8 |
+
def __init__(self, strategy_name):
|
| 9 |
+
self.strategy_name = strategy_name
|
| 10 |
+
self.train_metrics_by_round = []
|
| 11 |
+
self.eval_metrics_by_round = []
|
| 12 |
+
|
| 13 |
+
self.convergence_metrics = {
|
| 14 |
+
"rounds": [],
|
| 15 |
+
"loss_variance": [],
|
| 16 |
+
"loss_std": [],
|
| 17 |
+
"max_min_diff": []
|
| 18 |
+
}
|
| 19 |
+
|
| 20 |
+
@property
|
| 21 |
+
def active_client_ids(self):
|
| 22 |
+
"""
|
| 23 |
+
Inspeciona os dados coletados e retorna uma lista ordenada de
|
| 24 |
+
IDs de clientes únicos que enviaram métricas.
|
| 25 |
+
"""
|
| 26 |
+
ids = set()
|
| 27 |
+
all_metrics_by_round = self.train_metrics_by_round + self.eval_metrics_by_round
|
| 28 |
+
|
| 29 |
+
for round_data in all_metrics_by_round:
|
| 30 |
+
for key in round_data.keys():
|
| 31 |
+
if key.startswith("client_"):
|
| 32 |
+
# Extrai o número de 'client_X_...'
|
| 33 |
+
try:
|
| 34 |
+
client_id = int(key.split("_")[1])
|
| 35 |
+
ids.add(client_id)
|
| 36 |
+
except (ValueError, IndexError):
|
| 37 |
+
# Ignora chaves que não seguem o padrão esperado
|
| 38 |
+
continue
|
| 39 |
+
|
| 40 |
+
return sorted(list(ids))
|
| 41 |
+
|
| 42 |
+
def add_train_round(self, round_num, metrics):
|
| 43 |
+
"""Adiciona métricas de uma rodada de treinamento."""
|
| 44 |
+
round_data = {
|
| 45 |
+
"round": round_num,
|
| 46 |
+
"global_train_loss": metrics.get("global_train_loss", None),
|
| 47 |
+
"client_0_train_loss": metrics.get("client_0_train_loss", None),
|
| 48 |
+
"client_1_train_loss": metrics.get("client_1_train_loss", None),
|
| 49 |
+
"client_2_train_loss": metrics.get("client_2_train_loss", None),
|
| 50 |
+
}
|
| 51 |
+
self.train_metrics_by_round.append(round_data)
|
| 52 |
+
|
| 53 |
+
def add_eval_round(self, round_num, metrics):
|
| 54 |
+
"""Adiciona métricas de uma rodada de avaliação."""
|
| 55 |
+
round_data = {
|
| 56 |
+
"round": round_num,
|
| 57 |
+
"global_eval_loss": metrics.get("global_eval_loss", None),
|
| 58 |
+
"client_0_eval_loss": metrics.get("client_0_eval_loss", None),
|
| 59 |
+
"client_1_eval_loss": metrics.get("client_1_eval_loss", None),
|
| 60 |
+
"client_2_eval_loss": metrics.get("client_2_eval_loss", None),
|
| 61 |
+
}
|
| 62 |
+
self.eval_metrics_by_round.append(round_data)
|
| 63 |
+
|
| 64 |
+
def calculate_convergence_metrics(self, client_losses):
|
| 65 |
+
"""Calcula métricas de convergência entre clientes."""
|
| 66 |
+
if len(client_losses) > 0:
|
| 67 |
+
variance = np.var(client_losses)
|
| 68 |
+
std_dev = np.std(client_losses)
|
| 69 |
+
max_min_diff = max(client_losses) - min(client_losses)
|
| 70 |
+
return variance, std_dev, max_min_diff
|
| 71 |
+
return 0, 0, 0
|
| 72 |
+
|
| 73 |
+
@property
|
| 74 |
+
def train_metrics(self):
|
| 75 |
+
"""Retorna métricas de treino no formato para os gráficos."""
|
| 76 |
+
result = {
|
| 77 |
+
"rounds": [r["round"] for r in self.train_metrics_by_round],
|
| 78 |
+
"global_train_loss": [r["global_train_loss"] for r in self.train_metrics_by_round if r["global_train_loss"] is not None],
|
| 79 |
+
"client_0_train_loss": [r["client_0_train_loss"] for r in self.train_metrics_by_round if r["client_0_train_loss"] is not None],
|
| 80 |
+
"client_1_train_loss": [r["client_1_train_loss"] for r in self.train_metrics_by_round if r["client_1_train_loss"] is not None],
|
| 81 |
+
"client_2_train_loss": [r["client_2_train_loss"] for r in self.train_metrics_by_round if r["client_2_train_loss"] is not None],
|
| 82 |
+
}
|
| 83 |
+
return result
|
| 84 |
+
|
| 85 |
+
@property
|
| 86 |
+
def eval_metrics(self):
|
| 87 |
+
"""Retorna métricas de avaliação no formato para os gráficos."""
|
| 88 |
+
result = {
|
| 89 |
+
"rounds": [r["round"] for r in self.eval_metrics_by_round],
|
| 90 |
+
"global_eval_loss": [r["global_eval_loss"] for r in self.eval_metrics_by_round if r["global_eval_loss"] is not None],
|
| 91 |
+
"client_0_eval_loss": [r["client_0_eval_loss"] for r in self.eval_metrics_by_round if r["client_0_eval_loss"] is not None],
|
| 92 |
+
"client_1_eval_loss": [r["client_1_eval_loss"] for r in self.eval_metrics_by_round if r["client_1_eval_loss"] is not None],
|
| 93 |
+
"client_2_eval_loss": [r["client_2_eval_loss"] for r in self.eval_metrics_by_round if r["client_2_eval_loss"] is not None],
|
| 94 |
+
}
|
| 95 |
+
return result
|
fleven/mlflow_utils.py
ADDED
|
@@ -0,0 +1,162 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import mlflow
|
| 2 |
+
from pathlib import Path
|
| 3 |
+
from typing import Dict, Optional
|
| 4 |
+
import torch
|
| 5 |
+
|
| 6 |
+
"""Utilitários para integração com MLflow."""
|
| 7 |
+
|
| 8 |
+
class MLflowTracker:
|
| 9 |
+
"""Gerencia logging de experimentos com MLflow."""
|
| 10 |
+
|
| 11 |
+
def __init__(self, tracking_uri: str, experiment_name: str, enabled: bool = True):
|
| 12 |
+
"""
|
| 13 |
+
Inicializa o tracker do MLflow.
|
| 14 |
+
|
| 15 |
+
Args:
|
| 16 |
+
tracking_uri: URI do servidor MLflow
|
| 17 |
+
experiment_name: Nome do experimento
|
| 18 |
+
enabled: Se True, habilita logging no MLflow
|
| 19 |
+
"""
|
| 20 |
+
self.enabled = enabled
|
| 21 |
+
|
| 22 |
+
if not self.enabled:
|
| 23 |
+
print("[MLflow] Tracking desabilitado")
|
| 24 |
+
return
|
| 25 |
+
|
| 26 |
+
try:
|
| 27 |
+
mlflow.set_tracking_uri(tracking_uri)
|
| 28 |
+
mlflow.set_experiment(experiment_name)
|
| 29 |
+
self.experiment = mlflow.get_experiment_by_name(experiment_name)
|
| 30 |
+
print(f"[MLflow] Conectado ao experimento '{experiment_name}' em {tracking_uri}")
|
| 31 |
+
except Exception as e:
|
| 32 |
+
print(f"[MLflow] AVISO: Erro ao conectar: {e}")
|
| 33 |
+
self.enabled = False
|
| 34 |
+
|
| 35 |
+
def start_run(self, run_name: str, tags: Optional[Dict] = None) -> Optional[mlflow.ActiveRun]:
|
| 36 |
+
"""Inicia um novo run no MLflow."""
|
| 37 |
+
if not self.enabled:
|
| 38 |
+
return None
|
| 39 |
+
|
| 40 |
+
try:
|
| 41 |
+
run = mlflow.start_run(run_name=run_name, tags=tags)
|
| 42 |
+
print(f"[MLflow] Run iniciado: {run_name} (ID: {run.info.run_id})")
|
| 43 |
+
return run
|
| 44 |
+
except Exception as e:
|
| 45 |
+
print(f"[MLflow] AVISO: Erro ao iniciar run: {e}")
|
| 46 |
+
return None
|
| 47 |
+
|
| 48 |
+
def end_run(self):
|
| 49 |
+
"""Finaliza o run atual."""
|
| 50 |
+
if not self.enabled:
|
| 51 |
+
return
|
| 52 |
+
|
| 53 |
+
try:
|
| 54 |
+
mlflow.end_run()
|
| 55 |
+
print("[MLflow] Run finalizado")
|
| 56 |
+
except Exception as e:
|
| 57 |
+
print(f"[MLflow] AVISO: Erro ao finalizar run: {e}")
|
| 58 |
+
|
| 59 |
+
def log_params(self, params: Dict):
|
| 60 |
+
"""Loga parâmetros do experimento."""
|
| 61 |
+
if not self.enabled:
|
| 62 |
+
return
|
| 63 |
+
|
| 64 |
+
try:
|
| 65 |
+
mlflow.log_params(params)
|
| 66 |
+
print(f"[MLflow] {len(params)} parâmetros logados")
|
| 67 |
+
except Exception as e:
|
| 68 |
+
print(f"[MLflow] AVISO: Erro ao logar parâmetros: {e}")
|
| 69 |
+
|
| 70 |
+
def log_metric(self, key: str, value: float, step: Optional[int] = None):
|
| 71 |
+
"""Loga uma métrica."""
|
| 72 |
+
if not self.enabled:
|
| 73 |
+
return
|
| 74 |
+
|
| 75 |
+
try:
|
| 76 |
+
mlflow.log_metric(key, value, step=step)
|
| 77 |
+
except Exception as e:
|
| 78 |
+
print(f"[MLflow] AVISO: Erro ao logar métrica {key}: {e}")
|
| 79 |
+
|
| 80 |
+
def log_metrics(self, metrics: Dict, step: Optional[int] = None):
|
| 81 |
+
"""Loga múltiplas métricas."""
|
| 82 |
+
if not self.enabled:
|
| 83 |
+
return
|
| 84 |
+
|
| 85 |
+
try:
|
| 86 |
+
mlflow.log_metrics(metrics, step=step)
|
| 87 |
+
except Exception as e:
|
| 88 |
+
print(f"[MLflow] AVISO: Erro ao logar métricas: {e}")
|
| 89 |
+
|
| 90 |
+
def log_artifact(self, local_path: str):
|
| 91 |
+
"""Loga um arquivo como artifact."""
|
| 92 |
+
if not self.enabled:
|
| 93 |
+
return
|
| 94 |
+
|
| 95 |
+
try:
|
| 96 |
+
mlflow.log_artifact(local_path)
|
| 97 |
+
print(f"[MLflow] Artifact logado: {local_path}")
|
| 98 |
+
except Exception as e:
|
| 99 |
+
print(f"[MLflow] AVISO: Erro ao logar artifact: {e}")
|
| 100 |
+
|
| 101 |
+
def log_artifacts(self, local_dir: str):
|
| 102 |
+
"""Loga um diretório inteiro como artifacts."""
|
| 103 |
+
if not self.enabled:
|
| 104 |
+
return
|
| 105 |
+
|
| 106 |
+
try:
|
| 107 |
+
mlflow.log_artifacts(local_dir)
|
| 108 |
+
print(f"[MLflow] Artifacts logados do diretório: {local_dir}")
|
| 109 |
+
except Exception as e:
|
| 110 |
+
print(f"[MLflow] AVISO: Erro ao logar artifacts: {e}")
|
| 111 |
+
|
| 112 |
+
def log_model(self, model: torch.nn.Module, artifact_path: str = "model"):
|
| 113 |
+
"""Loga o modelo PyTorch."""
|
| 114 |
+
if not self.enabled:
|
| 115 |
+
return
|
| 116 |
+
|
| 117 |
+
try:
|
| 118 |
+
mlflow.pytorch.log_model(model, artifact_path)
|
| 119 |
+
print(f"[MLflow] Modelo PyTorch logado em '{artifact_path}'")
|
| 120 |
+
except Exception as e:
|
| 121 |
+
print(f"[MLflow] AVISO: Erro ao logar modelo: {e}")
|
| 122 |
+
|
| 123 |
+
def set_tag(self, key: str, value: str):
|
| 124 |
+
"""Define uma tag para o run."""
|
| 125 |
+
if not self.enabled:
|
| 126 |
+
return
|
| 127 |
+
|
| 128 |
+
try:
|
| 129 |
+
mlflow.set_tag(key, value)
|
| 130 |
+
except Exception as e:
|
| 131 |
+
print(f"[MLflow] AVISO: Erro ao definir tag: {e}")
|
| 132 |
+
|
| 133 |
+
def set_tags(self, tags: Dict[str, str]):
|
| 134 |
+
"""Define múltiplas tags."""
|
| 135 |
+
if not self.enabled:
|
| 136 |
+
return
|
| 137 |
+
|
| 138 |
+
try:
|
| 139 |
+
mlflow.set_tags(tags)
|
| 140 |
+
except Exception as e:
|
| 141 |
+
print(f"[MLflow] AVISO: Erro ao definir tags: {e}")
|
| 142 |
+
|
| 143 |
+
|
| 144 |
+
def get_mlflow_tracker(context) -> MLflowTracker:
|
| 145 |
+
"""
|
| 146 |
+
Cria um MLflowTracker a partir do contexto do Flower.
|
| 147 |
+
|
| 148 |
+
Args:
|
| 149 |
+
context: Context do Flower contendo configurações
|
| 150 |
+
|
| 151 |
+
Returns:
|
| 152 |
+
Instância de MLflowTracker
|
| 153 |
+
"""
|
| 154 |
+
mlflow_enabled = context.run_config.get("mlflow-enable", True)
|
| 155 |
+
tracking_uri = context.run_config.get("mlflow-tracking-uri", "http://127.0.0.1:5000")
|
| 156 |
+
experiment_name = context.run_config.get("mlflow-experiment-name", "FLEVEn-Experiments")
|
| 157 |
+
|
| 158 |
+
return MLflowTracker(
|
| 159 |
+
tracking_uri=tracking_uri,
|
| 160 |
+
experiment_name=experiment_name,
|
| 161 |
+
enabled=mlflow_enabled
|
| 162 |
+
)
|
fleven/server.py
ADDED
|
@@ -0,0 +1,358 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""ServerApp para aprendizado federado com FLEVEn."""
|
| 2 |
+
import torch
|
| 3 |
+
from typing import Iterable, Optional
|
| 4 |
+
from pathlib import Path
|
| 5 |
+
from datetime import datetime
|
| 6 |
+
|
| 7 |
+
from flwr.app import Context, ArrayRecord, MetricRecord
|
| 8 |
+
from flwr.serverapp import ServerApp, Grid
|
| 9 |
+
from flwr.serverapp.strategy import FedAvg, FedAdam, FedYogi, FedAdagrad
|
| 10 |
+
from flwr.common import Message
|
| 11 |
+
|
| 12 |
+
from fleven.utils import set_seed, get_model
|
| 13 |
+
from fleven.collector import MetricsCollector
|
| 14 |
+
from fleven.analysis import create_visualizations, save_detailed_metrics, print_final_summary
|
| 15 |
+
from fleven.mlflow_utils import get_mlflow_tracker # import do fmlfow
|
| 16 |
+
|
| 17 |
+
STRATEGIES = {
|
| 18 |
+
"fedavg": FedAvg,
|
| 19 |
+
"fedadam": FedAdam,
|
| 20 |
+
"fedyogi": FedYogi,
|
| 21 |
+
"fedadagrad": FedAdagrad,
|
| 22 |
+
}
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
def get_custom_strategy_class(base_strategy_class):
|
| 26 |
+
"""Cria dinamicamente uma classe CustomStrategy que herda da estratégia base."""
|
| 27 |
+
|
| 28 |
+
class CustomStrategy(base_strategy_class):
|
| 29 |
+
def __init__(self, collector: MetricsCollector, mlflow_tracker=None, **kwargs):
|
| 30 |
+
super().__init__(**kwargs)
|
| 31 |
+
self.collector = collector
|
| 32 |
+
self.mlflow_tracker = mlflow_tracker # Adicionar tracker
|
| 33 |
+
strategy_name = self.__class__.__bases__[0].__name__
|
| 34 |
+
print(f"CustomStrategy (coletando métricas para {strategy_name}) inicializada.")
|
| 35 |
+
|
| 36 |
+
def aggregate_train(self, server_round: int, replies: Iterable[Message]) -> tuple[Optional[ArrayRecord], Optional[MetricRecord]]:
|
| 37 |
+
aggregated_arrays, aggregated_metrics = super().aggregate_train(server_round, replies)
|
| 38 |
+
|
| 39 |
+
if aggregated_metrics:
|
| 40 |
+
individual_losses = {}
|
| 41 |
+
for reply in replies:
|
| 42 |
+
if reply.has_content() and "metrics" in reply.content:
|
| 43 |
+
metrics = reply.content["metrics"]
|
| 44 |
+
client_id = int(metrics.get("client_id", 0))
|
| 45 |
+
train_loss = float(metrics.get("train_loss", 0.0))
|
| 46 |
+
print(f" > Detalhe Cliente {client_id}: Perda de Treino = {train_loss:.6f}")
|
| 47 |
+
individual_losses[f"client_{client_id}_train_loss"] = train_loss
|
| 48 |
+
|
| 49 |
+
# Log no MLflow - métricas individuais
|
| 50 |
+
if self.mlflow_tracker:
|
| 51 |
+
self.mlflow_tracker.log_metric(
|
| 52 |
+
f"client_{client_id}/train_loss",
|
| 53 |
+
train_loss,
|
| 54 |
+
step=server_round
|
| 55 |
+
)
|
| 56 |
+
|
| 57 |
+
global_loss = aggregated_metrics.get("train_loss")
|
| 58 |
+
metrics_dict = {"global_train_loss": global_loss}
|
| 59 |
+
metrics_dict.update(individual_losses)
|
| 60 |
+
self.collector.add_train_round(server_round, metrics_dict)
|
| 61 |
+
|
| 62 |
+
# Log no MLflow - métrica global
|
| 63 |
+
if self.mlflow_tracker and global_loss is not None:
|
| 64 |
+
self.mlflow_tracker.log_metric(
|
| 65 |
+
"global/train_loss",
|
| 66 |
+
global_loss,
|
| 67 |
+
step=server_round
|
| 68 |
+
)
|
| 69 |
+
|
| 70 |
+
return aggregated_arrays, aggregated_metrics
|
| 71 |
+
|
| 72 |
+
def aggregate_evaluate(self, server_round: int, replies: Iterable[Message]) -> Optional[MetricRecord]:
|
| 73 |
+
aggregated_metrics = super().aggregate_evaluate(server_round, replies)
|
| 74 |
+
|
| 75 |
+
if aggregated_metrics:
|
| 76 |
+
individual_losses = {}
|
| 77 |
+
for reply in replies:
|
| 78 |
+
if reply.has_content() and "metrics" in reply.content:
|
| 79 |
+
metrics = reply.content["metrics"]
|
| 80 |
+
client_id = int(metrics.get("client_id", 0))
|
| 81 |
+
eval_loss = float(metrics.get("eval_loss", 0.0))
|
| 82 |
+
print(f" > Detalhe Cliente {client_id}: Perda de Avaliação = {eval_loss:.6f}")
|
| 83 |
+
individual_losses[f"client_{client_id}_eval_loss"] = eval_loss
|
| 84 |
+
|
| 85 |
+
# Log no MLflow - métricas individuais
|
| 86 |
+
if self.mlflow_tracker:
|
| 87 |
+
self.mlflow_tracker.log_metric(
|
| 88 |
+
f"client_{client_id}/eval_loss",
|
| 89 |
+
eval_loss,
|
| 90 |
+
step=server_round
|
| 91 |
+
)
|
| 92 |
+
|
| 93 |
+
global_loss = aggregated_metrics.get("eval_loss")
|
| 94 |
+
metrics_dict = {"global_eval_loss": global_loss}
|
| 95 |
+
metrics_dict.update(individual_losses)
|
| 96 |
+
self.collector.add_eval_round(server_round, metrics_dict)
|
| 97 |
+
|
| 98 |
+
# Log no MLflow - métrica global
|
| 99 |
+
if self.mlflow_tracker and global_loss is not None:
|
| 100 |
+
self.mlflow_tracker.log_metric(
|
| 101 |
+
"global/eval_loss",
|
| 102 |
+
global_loss,
|
| 103 |
+
step=server_round
|
| 104 |
+
)
|
| 105 |
+
|
| 106 |
+
return aggregated_metrics
|
| 107 |
+
|
| 108 |
+
return CustomStrategy
|
| 109 |
+
|
| 110 |
+
|
| 111 |
+
# Cria a aplicação servidor
|
| 112 |
+
app = ServerApp()
|
| 113 |
+
|
| 114 |
+
@app.main()
|
| 115 |
+
def main(grid: Grid, context: Context) -> None:
|
| 116 |
+
"""Função principal do servidor - lê todas as configurações do Context."""
|
| 117 |
+
|
| 118 |
+
mlflow_tracker = get_mlflow_tracker(context)
|
| 119 |
+
|
| 120 |
+
seed = int(context.run_config.get("seed", 42))
|
| 121 |
+
set_seed(seed)
|
| 122 |
+
|
| 123 |
+
# 🔧 configs gerais
|
| 124 |
+
strategy_name = context.run_config.get("strategy", "fedavg").lower()
|
| 125 |
+
num_rounds = int(context.run_config.get("rounds", 5))
|
| 126 |
+
min_nodes = int(context.run_config.get("min-nodes", 3))
|
| 127 |
+
|
| 128 |
+
|
| 129 |
+
# 🔧 Configurações do modelo
|
| 130 |
+
model_type = context.run_config.get("model-type", "lstm")
|
| 131 |
+
input_size = int(context.run_config.get("input-size", 6))
|
| 132 |
+
prediction_length = int(context.run_config.get("prediction-length", 10))
|
| 133 |
+
num_layers = int(context.run_config.get("num-layers", 1))
|
| 134 |
+
sequence_length = int(context.run_config.get("sequence-length", 60))
|
| 135 |
+
target_column = str(context.run_config.get("target-column", "P_kW"))
|
| 136 |
+
|
| 137 |
+
# Parâmetros para "lstm" e "mlp"
|
| 138 |
+
hidden_size = int(context.run_config.get("hidden-size", 32))
|
| 139 |
+
|
| 140 |
+
# Parâmetros para "lstm_dense" (o novo modelo adaptado)
|
| 141 |
+
lstm_hidden_size = int(context.run_config.get("lstm-hidden-size", 32))
|
| 142 |
+
dense_hidden_size = int(context.run_config.get("dense-hidden-size", 16))
|
| 143 |
+
|
| 144 |
+
# Parâmetro de Dropout para "lstm" e "lstm_dense"
|
| 145 |
+
dropout = float(context.run_config.get("dropout", 0.0))
|
| 146 |
+
|
| 147 |
+
# --- FIM DA ALTERAÇÃO 1 ---
|
| 148 |
+
|
| 149 |
+
# Configurações de treino
|
| 150 |
+
batch_size = int(context.run_config.get("batch-size", 32))
|
| 151 |
+
learning_rate = float(context.run_config.get("learning-rate", 1e-5))
|
| 152 |
+
local_epochs = int(context.run_config.get("local-epochs", 1))
|
| 153 |
+
train_test_split = float(context.run_config.get("train-test-split", 0.8))
|
| 154 |
+
|
| 155 |
+
# 🔧 Caminho para salvar resultados
|
| 156 |
+
results_base_path = context.run_config.get("results-base-path", None)
|
| 157 |
+
if results_base_path:
|
| 158 |
+
output_dir = Path(results_base_path)
|
| 159 |
+
else:
|
| 160 |
+
base_dir = Path(__file__).parent.parent
|
| 161 |
+
output_dir = base_dir / "results"
|
| 162 |
+
|
| 163 |
+
output_dir.mkdir(parents=True, exist_ok=True)
|
| 164 |
+
|
| 165 |
+
# Inicia run no MLflow
|
| 166 |
+
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
| 167 |
+
run_name = f"{strategy_name}_{model_type}_{timestamp}"
|
| 168 |
+
|
| 169 |
+
mlflow_tracker.start_run(
|
| 170 |
+
run_name=run_name,
|
| 171 |
+
tags={
|
| 172 |
+
"strategy": strategy_name,
|
| 173 |
+
"model_type": model_type,
|
| 174 |
+
"target": target_column
|
| 175 |
+
}
|
| 176 |
+
)
|
| 177 |
+
|
| 178 |
+
# Log dos parâmetros no MLflow
|
| 179 |
+
mlflow_tracker.log_params({
|
| 180 |
+
"strategy": strategy_name,
|
| 181 |
+
"num_rounds": num_rounds,
|
| 182 |
+
"min_nodes": min_nodes,
|
| 183 |
+
"model_type": model_type,
|
| 184 |
+
"input_size": input_size,
|
| 185 |
+
"prediction_length": prediction_length,
|
| 186 |
+
"num_layers": num_layers,
|
| 187 |
+
"sequence_length": sequence_length,
|
| 188 |
+
"target_column": target_column,
|
| 189 |
+
"batch_size": batch_size,
|
| 190 |
+
"learning_rate": learning_rate,
|
| 191 |
+
"local_epochs": local_epochs,
|
| 192 |
+
"train_test_split": train_test_split,
|
| 193 |
+
"seed": seed,
|
| 194 |
+
|
| 195 |
+
# Novos parâmetros
|
| 196 |
+
"hidden_size": hidden_size,
|
| 197 |
+
"lstm_hidden_size": lstm_hidden_size,
|
| 198 |
+
"dense_hidden_size": dense_hidden_size,
|
| 199 |
+
"dropout": dropout
|
| 200 |
+
})
|
| 201 |
+
|
| 202 |
+
print(f"\n{'='*60}")
|
| 203 |
+
print(f"SERVIDOR DE APRENDIZADO FEDERADO")
|
| 204 |
+
print(f"{'='*60}")
|
| 205 |
+
print(f"Estratégia: {strategy_name.upper()}")
|
| 206 |
+
print(f"Rodadas: {num_rounds}")
|
| 207 |
+
print(f"Nós mínimos: {min_nodes}")
|
| 208 |
+
print(f"Modelo: {model_type.upper()}")
|
| 209 |
+
print(f"Tamanho da Previsão: {prediction_length}")
|
| 210 |
+
print(f"Tamanho Hidden (lstm/mlp): {hidden_size}")
|
| 211 |
+
print(f"Tamanho LSTM Hidden (lstm_dense): {lstm_hidden_size}")
|
| 212 |
+
print(f"Tamanho Dense Hidden (lstm_dense): {dense_hidden_size}")
|
| 213 |
+
print(f"Número de Camadas do Modelo: {num_layers}")
|
| 214 |
+
print(f"Dropout: {dropout}")
|
| 215 |
+
print(f"Target Column: {target_column}")
|
| 216 |
+
print(f"Resultados serão salvos em: {output_dir.absolute()}")
|
| 217 |
+
print(f"{'='*60}\n")
|
| 218 |
+
|
| 219 |
+
# 🔧 Cria coletor de métricas
|
| 220 |
+
collector = MetricsCollector(strategy_name)
|
| 221 |
+
|
| 222 |
+
# 🔧 Cria o dicionário de configuração do modelo
|
| 223 |
+
model_config = {
|
| 224 |
+
"name": model_type,
|
| 225 |
+
"input_size": input_size,
|
| 226 |
+
"output_size": prediction_length,
|
| 227 |
+
"num_layers": num_layers,
|
| 228 |
+
"sequence_length": sequence_length,
|
| 229 |
+
|
| 230 |
+
# Parâmetros para "lstm" e "mlp"
|
| 231 |
+
"hidden_size": hidden_size,
|
| 232 |
+
|
| 233 |
+
# Parâmetros para "lstm_dense"
|
| 234 |
+
"lstm_hidden_size": lstm_hidden_size,
|
| 235 |
+
"dense_hidden_size": dense_hidden_size,
|
| 236 |
+
|
| 237 |
+
# Parâmetro de Dropout
|
| 238 |
+
"dropout": dropout
|
| 239 |
+
}
|
| 240 |
+
|
| 241 |
+
# 🔧 Cria modelo inicial
|
| 242 |
+
net = get_model(model_config)
|
| 243 |
+
initial_arrays = ArrayRecord(net.state_dict())
|
| 244 |
+
|
| 245 |
+
# 🔧 Parâmetros base para a estratégia
|
| 246 |
+
strategy_params = {
|
| 247 |
+
"fraction_train": 1.0,
|
| 248 |
+
"fraction_evaluate": 1.0,
|
| 249 |
+
"min_available_nodes": min_nodes,
|
| 250 |
+
"min_train_nodes": min_nodes,
|
| 251 |
+
"min_evaluate_nodes": min_nodes,
|
| 252 |
+
}
|
| 253 |
+
|
| 254 |
+
# 🔧 Carrega parâmetros específicos da estratégia
|
| 255 |
+
strategy_specific_params = context.run_config.get("strategy-params", {})
|
| 256 |
+
|
| 257 |
+
if strategy_name == "fedadam":
|
| 258 |
+
strategy_params["eta"] = float(strategy_specific_params.get("eta", 0.01))
|
| 259 |
+
strategy_params["beta_1"] = float(strategy_specific_params.get("beta_1", 0.9))
|
| 260 |
+
strategy_params["beta_2"] = float(strategy_specific_params.get("beta_2", 0.999))
|
| 261 |
+
print(f"Carregando FedAdam com: eta={strategy_params['eta']}, beta_1={strategy_params['beta_1']}, beta_2={strategy_params['beta_2']}")
|
| 262 |
+
|
| 263 |
+
# Log parâmetros específicos da estratégia
|
| 264 |
+
mlflow_tracker.log_params({
|
| 265 |
+
"eta": strategy_params["eta"],
|
| 266 |
+
"beta_1": strategy_params["beta_1"],
|
| 267 |
+
"beta_2": strategy_params["beta_2"]
|
| 268 |
+
})
|
| 269 |
+
|
| 270 |
+
elif strategy_name == "fedadagrad":
|
| 271 |
+
strategy_params["eta"] = float(strategy_specific_params.get("eta_adagrad", 0.1))
|
| 272 |
+
strategy_params["initial_accumulator_value"] = float(strategy_specific_params.get("initial_accumulator_value", 0.1))
|
| 273 |
+
print(f"Carregando FedAdagrad com: eta={strategy_params['eta']}, initial_accumulator_value={strategy_params['initial_accumulator_value']}")
|
| 274 |
+
|
| 275 |
+
mlflow_tracker.log_params({
|
| 276 |
+
"eta": strategy_params["eta"],
|
| 277 |
+
"initial_accumulator_value": strategy_params["initial_accumulator_value"]
|
| 278 |
+
})
|
| 279 |
+
|
| 280 |
+
elif strategy_name == "fedyogi":
|
| 281 |
+
strategy_params["eta"] = float(strategy_specific_params.get("eta_yogi", 0.01))
|
| 282 |
+
strategy_params["beta_1"] = float(strategy_specific_params.get("beta_1_yogi", 0.9))
|
| 283 |
+
strategy_params["beta_2"] = float(strategy_specific_params.get("beta_2_yogi", 0.999))
|
| 284 |
+
strategy_params["initial_accumulator_value"] = float(strategy_specific_params.get("initial_accumulator_value_yogi", 1e-6))
|
| 285 |
+
print(f"Carregando FedYogi com: eta={strategy_params['eta']}, beta_1={strategy_params['beta_1']}, beta_2={strategy_params['beta_2']}")
|
| 286 |
+
|
| 287 |
+
mlflow_tracker.log_params({
|
| 288 |
+
"eta": strategy_params["eta"],
|
| 289 |
+
"beta_1": strategy_params["beta_1"],
|
| 290 |
+
"beta_2": strategy_params["beta_2"],
|
| 291 |
+
"initial_accumulator_value": strategy_params["initial_accumulator_value"]
|
| 292 |
+
})
|
| 293 |
+
|
| 294 |
+
# 🔧 Instancia a estratégia de forma dinâmica
|
| 295 |
+
BaseStrategyClass = STRATEGIES.get(strategy_name, FedAvg)
|
| 296 |
+
CustomStrategyClass = get_custom_strategy_class(BaseStrategyClass)
|
| 297 |
+
strategy = CustomStrategyClass(
|
| 298 |
+
collector=collector,
|
| 299 |
+
mlflow_tracker=mlflow_tracker, # Passar tracker para a estratégia
|
| 300 |
+
**strategy_params
|
| 301 |
+
)
|
| 302 |
+
|
| 303 |
+
print("Iniciando servidor FL...")
|
| 304 |
+
|
| 305 |
+
# 🔧 Inicia o treino federado
|
| 306 |
+
result = strategy.start(
|
| 307 |
+
grid=grid,
|
| 308 |
+
initial_arrays=initial_arrays,
|
| 309 |
+
num_rounds=num_rounds,
|
| 310 |
+
)
|
| 311 |
+
|
| 312 |
+
# 🔧 Imprime informações sobre o resultado final
|
| 313 |
+
print("\n" + "="*60)
|
| 314 |
+
if result.arrays:
|
| 315 |
+
print(f"Modelo final obtido com sucesso!")
|
| 316 |
+
total_params = sum(p.numel() for p in result.arrays.to_torch_state_dict().values())
|
| 317 |
+
print(f"Total de parâmetros: {total_params}")
|
| 318 |
+
|
| 319 |
+
# Log do modelo final no MLflow
|
| 320 |
+
final_model = get_model(model_config)
|
| 321 |
+
final_model.load_state_dict(result.arrays.to_torch_state_dict())
|
| 322 |
+
mlflow_tracker.log_model(final_model, "final_model")
|
| 323 |
+
|
| 324 |
+
# Salvar modelo final localmente e logar como artifact
|
| 325 |
+
model_path = output_dir / "final_model.pt"
|
| 326 |
+
torch.save(final_model.state_dict(), model_path)
|
| 327 |
+
mlflow_tracker.log_artifact(str(model_path))
|
| 328 |
+
|
| 329 |
+
print(f"Resultados salvos em: {output_dir.absolute()}")
|
| 330 |
+
print("="*60)
|
| 331 |
+
|
| 332 |
+
# 🔧 Gera análises e visualizações
|
| 333 |
+
print("\nTREINAMENTO CONCLUÍDO - GERANDO ANÁLISES")
|
| 334 |
+
print("="*60)
|
| 335 |
+
|
| 336 |
+
try:
|
| 337 |
+
create_visualizations(collector, output_dir)
|
| 338 |
+
save_detailed_metrics(collector, output_dir)
|
| 339 |
+
print_final_summary(collector)
|
| 340 |
+
|
| 341 |
+
# Log dos artifacts (gráficos, CSVs, etc.) no MLflow
|
| 342 |
+
mlflow_tracker.log_artifacts(str(output_dir))
|
| 343 |
+
|
| 344 |
+
except Exception as e:
|
| 345 |
+
print(f"AVISO: Erro ao gerar análises: {e}")
|
| 346 |
+
print("O treinamento foi concluído com sucesso, mas as visualizações não foram geradas.")
|
| 347 |
+
|
| 348 |
+
# Finaliza o run do MLflow
|
| 349 |
+
mlflow_tracker.end_run()
|
| 350 |
+
|
| 351 |
+
print("\n" + "="*60)
|
| 352 |
+
print("PROCESSAMENTO FINALIZADO")
|
| 353 |
+
print("="*60)
|
| 354 |
+
|
| 355 |
+
|
| 356 |
+
if __name__ == "__main__":
|
| 357 |
+
print("Servidor pronto para ser executado com Flower 1.22.0")
|
| 358 |
+
print("Use: flwr run .")
|
fleven/utils.py
ADDED
|
@@ -0,0 +1,284 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pandas as pd
|
| 2 |
+
import numpy as np
|
| 3 |
+
import random
|
| 4 |
+
import torch
|
| 5 |
+
import torch.nn as nn
|
| 6 |
+
import torch.nn.functional as F
|
| 7 |
+
from torch.utils.data import TensorDataset, DataLoader
|
| 8 |
+
from sklearn.preprocessing import MinMaxScaler
|
| 9 |
+
from pathlib import Path
|
| 10 |
+
|
| 11 |
+
def set_seed(seed: int):
|
| 12 |
+
"""Seeds para reprodutibilidade."""
|
| 13 |
+
random.seed(seed)
|
| 14 |
+
np.random.seed(seed)
|
| 15 |
+
torch.manual_seed(seed)
|
| 16 |
+
if torch.cuda.is_available():
|
| 17 |
+
torch.cuda.manual_seed_all(seed)
|
| 18 |
+
|
| 19 |
+
# LSTM
|
| 20 |
+
class LSTMNet(nn.Module):
|
| 21 |
+
def __init__(self, input_size, hidden_size, output_size, num_layers=1, dropout=0.0):
|
| 22 |
+
super(LSTMNet, self).__init__()
|
| 23 |
+
self.lstm = nn.LSTM(
|
| 24 |
+
input_size,
|
| 25 |
+
hidden_size,
|
| 26 |
+
num_layers,
|
| 27 |
+
batch_first=True,
|
| 28 |
+
# Dropout entre camadas LSTM empilhadas (se num_layers > 1)
|
| 29 |
+
dropout=dropout if num_layers > 1 else 0.0
|
| 30 |
+
)
|
| 31 |
+
self.linear = nn.Linear(hidden_size, output_size)
|
| 32 |
+
|
| 33 |
+
def forward(self, x):
|
| 34 |
+
lstm_out, _ = self.lstm(x)
|
| 35 |
+
last_time_step_out = lstm_out[:, -1, :]
|
| 36 |
+
out = self.linear(last_time_step_out)
|
| 37 |
+
return out
|
| 38 |
+
|
| 39 |
+
# LSTM -> Dropout -> Dense(ReLU) -> Dense(Output)
|
| 40 |
+
class LSTMDenseNet(nn.Module):
|
| 41 |
+
def __init__(self, input_size, lstm_hidden_size, dense_hidden_size, output_size, num_layers=1, dropout=0.0):
|
| 42 |
+
super(LSTMDenseNet, self).__init__()
|
| 43 |
+
self.lstm = nn.LSTM(
|
| 44 |
+
input_size,
|
| 45 |
+
lstm_hidden_size,
|
| 46 |
+
num_layers,
|
| 47 |
+
batch_first=True,
|
| 48 |
+
dropout=dropout if num_layers > 1 else 0.0
|
| 49 |
+
)
|
| 50 |
+
# Dropout aplicado à saída da camada LSTM
|
| 51 |
+
self.dropout = nn.Dropout(dropout)
|
| 52 |
+
self.fc1 = nn.Linear(lstm_hidden_size, dense_hidden_size)
|
| 53 |
+
self.fc2 = nn.Linear(dense_hidden_size, output_size)
|
| 54 |
+
|
| 55 |
+
def forward(self, x):
|
| 56 |
+
# lstm_out shape: (batch_size, seq_len, lstm_hidden_size)
|
| 57 |
+
lstm_out, _ = self.lstm(x)
|
| 58 |
+
|
| 59 |
+
# saída do último passo de tempo
|
| 60 |
+
# (batch_size, lstm_hidden_size)
|
| 61 |
+
last_time_step_out = lstm_out[:, -1, :]
|
| 62 |
+
|
| 63 |
+
# Aplica dropout
|
| 64 |
+
out = self.dropout(last_time_step_out)
|
| 65 |
+
|
| 66 |
+
# Passa pelas camadas densas
|
| 67 |
+
out = self.fc1(out)
|
| 68 |
+
out = F.relu(out) # Aplicando ReLU como no notebook
|
| 69 |
+
out = self.fc2(out)
|
| 70 |
+
return out
|
| 71 |
+
|
| 72 |
+
# MLP
|
| 73 |
+
class MLPNet(nn.Module):
|
| 74 |
+
def __init__(self, input_size, hidden_size, output_size):
|
| 75 |
+
super(MLPNet, self).__init__()
|
| 76 |
+
# O input será a sequência achatada
|
| 77 |
+
self.fc1 = nn.Linear(input_size, hidden_size)
|
| 78 |
+
self.relu = nn.ReLU()
|
| 79 |
+
self.fc2 = nn.Linear(hidden_size, output_size)
|
| 80 |
+
|
| 81 |
+
def forward(self, x):
|
| 82 |
+
# deforma/achata o input de (batch, sequence_length, features) para (batch, sequence_length * features)
|
| 83 |
+
batch_size = x.shape[0]
|
| 84 |
+
x_flat = x.view(batch_size, -1)
|
| 85 |
+
|
| 86 |
+
out = self.fc1(x_flat)
|
| 87 |
+
out = self.relu(out)
|
| 88 |
+
out = self.fc2(out)
|
| 89 |
+
return out
|
| 90 |
+
|
| 91 |
+
def create_sliding_windows(data, sequence_length, prediction_length):
|
| 92 |
+
"""Cria janelas deslizantes para problemas de séries temporais."""
|
| 93 |
+
xs, ys = [], []
|
| 94 |
+
for i in range(len(data) - sequence_length - prediction_length + 1):
|
| 95 |
+
x = data[i:(i + sequence_length)]
|
| 96 |
+
y = data[(i + sequence_length):(i + sequence_length + prediction_length), -1]
|
| 97 |
+
xs.append(x)
|
| 98 |
+
ys.append(y)
|
| 99 |
+
return np.array(xs), np.array(ys)
|
| 100 |
+
|
| 101 |
+
def load_data(client_id: int, sequence_length: int, prediction_length: int,
|
| 102 |
+
batch_size: int, train_test_split: float, data_base_path: str = None,
|
| 103 |
+
target_column: str = "P_kW"):
|
| 104 |
+
"""
|
| 105 |
+
Carrega os dados para um cliente específico, processa e retorna DataLoaders.
|
| 106 |
+
|
| 107 |
+
Args:
|
| 108 |
+
client_id: ID do cliente
|
| 109 |
+
sequence_length: Tamanho da janela de entrada
|
| 110 |
+
prediction_length: Número de passos à frente para prever
|
| 111 |
+
batch_size: Tamanho do batch
|
| 112 |
+
train_test_split: Proporção de dados para treino (ex: 0.8 = 80%)
|
| 113 |
+
data_base_path: Caminho base para os dados (opcional),
|
| 114 |
+
target_column: O nome da coluna a ser usada como alvo da previsão
|
| 115 |
+
"""
|
| 116 |
+
# 🔧 Define o diretório de dados de forma robusta
|
| 117 |
+
if data_base_path:
|
| 118 |
+
# Usa o caminho configurado
|
| 119 |
+
data_dir = Path(data_base_path) / f"client_{client_id}"
|
| 120 |
+
print(f"[Cliente {client_id}] Usando data_base_path configurado: {data_dir}")
|
| 121 |
+
else:
|
| 122 |
+
# Usa caminho relativo ao arquivo atual
|
| 123 |
+
base_dir = Path(__file__).parent.parent
|
| 124 |
+
data_dir = base_dir / "data" / f"client_{client_id}"
|
| 125 |
+
print(f"[Cliente {client_id}] Usando caminho relativo: {data_dir}")
|
| 126 |
+
|
| 127 |
+
print(f"[Cliente {client_id}] Procurando dados em: {data_dir.absolute()}")
|
| 128 |
+
|
| 129 |
+
# Verifica se o diretório existe
|
| 130 |
+
if not data_dir.exists():
|
| 131 |
+
raise FileNotFoundError(
|
| 132 |
+
f"Diretório não encontrado para o cliente {client_id}: {data_dir.absolute()}"
|
| 133 |
+
)
|
| 134 |
+
|
| 135 |
+
# Carrega todos os arquivos CSV do diretório
|
| 136 |
+
csv_files = list(data_dir.glob("*.csv"))
|
| 137 |
+
|
| 138 |
+
if not csv_files:
|
| 139 |
+
raise FileNotFoundError(
|
| 140 |
+
f"Nenhum arquivo CSV encontrado para o cliente {client_id} no diretório {data_dir.absolute()}"
|
| 141 |
+
)
|
| 142 |
+
|
| 143 |
+
print(f"[Cliente {client_id}] Encontrados {len(csv_files)} arquivos CSV")
|
| 144 |
+
all_routes_df = [pd.read_csv(f) for f in csv_files]
|
| 145 |
+
combined_df = pd.concat(all_routes_df, ignore_index=True)
|
| 146 |
+
|
| 147 |
+
all_columns = ['vehicle_speed', 'engine_rpm', 'P_kW']
|
| 148 |
+
|
| 149 |
+
# se a coluna alvo existe
|
| 150 |
+
if target_column not in all_columns:
|
| 151 |
+
raise ValueError(
|
| 152 |
+
f"A coluna alvo '{target_column}' não é uma das colunas válidas: {all_columns}"
|
| 153 |
+
)
|
| 154 |
+
|
| 155 |
+
# Reordena as colunas para garantir que a coluna alvo seja a ÚLTIMA
|
| 156 |
+
feature_columns = [col for col in all_columns if col != target_column] + [target_column]
|
| 157 |
+
processed_df = combined_df[feature_columns].dropna()
|
| 158 |
+
|
| 159 |
+
split_index = int(len(processed_df) * train_test_split)
|
| 160 |
+
train_df = processed_df.iloc[:split_index]
|
| 161 |
+
test_df = processed_df.iloc[split_index:]
|
| 162 |
+
|
| 163 |
+
scaler = MinMaxScaler()
|
| 164 |
+
scaler.fit(train_df)
|
| 165 |
+
|
| 166 |
+
train_scaled = scaler.transform(train_df)
|
| 167 |
+
test_scaled = scaler.transform(test_df)
|
| 168 |
+
|
| 169 |
+
X_train, y_train = create_sliding_windows(train_scaled, sequence_length, prediction_length)
|
| 170 |
+
X_test, y_test = create_sliding_windows(test_scaled, sequence_length, prediction_length)
|
| 171 |
+
|
| 172 |
+
if len(X_train) == 0 or len(X_test) == 0:
|
| 173 |
+
raise ValueError(
|
| 174 |
+
f"A divisão de dados para o cliente {client_id} resultou em um conjunto vazio."
|
| 175 |
+
)
|
| 176 |
+
|
| 177 |
+
X_train_tensor = torch.from_numpy(X_train).float()
|
| 178 |
+
y_train_tensor = torch.from_numpy(y_train).float()
|
| 179 |
+
X_test_tensor = torch.from_numpy(X_test).float()
|
| 180 |
+
y_test_tensor = torch.from_numpy(y_test).float()
|
| 181 |
+
|
| 182 |
+
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
|
| 183 |
+
test_dataset = TensorDataset(X_test_tensor, y_test_tensor)
|
| 184 |
+
|
| 185 |
+
trainloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
|
| 186 |
+
testloader = DataLoader(test_dataset, batch_size=batch_size)
|
| 187 |
+
|
| 188 |
+
num_features = X_train_tensor.shape[2]
|
| 189 |
+
|
| 190 |
+
print(f"[Cliente {client_id}] Dados carregados: {len(train_dataset)} treino, {len(test_dataset)} teste")
|
| 191 |
+
|
| 192 |
+
return trainloader, testloader, num_features
|
| 193 |
+
|
| 194 |
+
def train(net, trainloader, epochs: int, learning_rate: float,
|
| 195 |
+
max_grad_norm: float, device):
|
| 196 |
+
"""Treina e retorna a perda média por amostra."""
|
| 197 |
+
criterion = torch.nn.MSELoss(reduction="mean")
|
| 198 |
+
optimizer = torch.optim.Adam(net.parameters(), lr=learning_rate)
|
| 199 |
+
net.to(device)
|
| 200 |
+
net.train()
|
| 201 |
+
|
| 202 |
+
total_loss_sum = 0.0
|
| 203 |
+
total_samples = 0
|
| 204 |
+
|
| 205 |
+
for _ in range(epochs):
|
| 206 |
+
for sequences, labels in trainloader:
|
| 207 |
+
sequences, labels = sequences.to(device), labels.to(device)
|
| 208 |
+
optimizer.zero_grad()
|
| 209 |
+
outputs = net(sequences)
|
| 210 |
+
loss = criterion(outputs, labels)
|
| 211 |
+
loss.backward()
|
| 212 |
+
torch.nn.utils.clip_grad_norm_(net.parameters(), max_norm=max_grad_norm)
|
| 213 |
+
optimizer.step()
|
| 214 |
+
|
| 215 |
+
batch_size = sequences.size(0)
|
| 216 |
+
total_loss_sum += loss.item() * batch_size
|
| 217 |
+
total_samples += batch_size
|
| 218 |
+
|
| 219 |
+
if total_samples == 0:
|
| 220 |
+
return 0.0
|
| 221 |
+
return total_loss_sum / total_samples
|
| 222 |
+
|
| 223 |
+
|
| 224 |
+
def test(net, testloader, device):
|
| 225 |
+
"""Avalia e retorna (avg_loss_per_sample, num_examples)."""
|
| 226 |
+
criterion = torch.nn.MSELoss(reduction="mean")
|
| 227 |
+
net.to(device)
|
| 228 |
+
net.eval()
|
| 229 |
+
total_loss_sum = 0.0
|
| 230 |
+
total_samples = 0
|
| 231 |
+
with torch.no_grad():
|
| 232 |
+
for sequences, labels in testloader:
|
| 233 |
+
sequences, labels = sequences.to(device), labels.to(device)
|
| 234 |
+
outputs = net(sequences)
|
| 235 |
+
loss = criterion(outputs, labels)
|
| 236 |
+
batch_size = sequences.size(0)
|
| 237 |
+
total_loss_sum += loss.item() * batch_size
|
| 238 |
+
total_samples += batch_size
|
| 239 |
+
|
| 240 |
+
if total_samples == 0:
|
| 241 |
+
return 0.0, 0
|
| 242 |
+
avg_loss = total_loss_sum / total_samples
|
| 243 |
+
return avg_loss, total_samples
|
| 244 |
+
|
| 245 |
+
def get_model(model_config: dict):
|
| 246 |
+
"""
|
| 247 |
+
Fábrica de modelos que retorna uma instância de modelo com base na configuração.
|
| 248 |
+
"""
|
| 249 |
+
model_type = model_config.get("name", "lstm").lower()
|
| 250 |
+
|
| 251 |
+
if model_type == "lstm":
|
| 252 |
+
print(f"Criando modelo LSTMNet (Simples: LSTM -> Linear)...")
|
| 253 |
+
# Modelo original do projeto, agora com dropout
|
| 254 |
+
return LSTMNet(
|
| 255 |
+
input_size=model_config["input_size"],
|
| 256 |
+
hidden_size=model_config["hidden_size"], # Usa 'hidden_size'
|
| 257 |
+
output_size=model_config["output_size"],
|
| 258 |
+
num_layers=model_config.get("num_layers", 1),
|
| 259 |
+
dropout=model_config.get("dropout", 0.0)
|
| 260 |
+
)
|
| 261 |
+
|
| 262 |
+
elif model_type == "lstm_dense":
|
| 263 |
+
print(f"Criando modelo LSTMDenseNet (Adaptado: LSTM -> Dense -> Linear)...")
|
| 264 |
+
# modelo adaptado de um dos notebook do DACAI
|
| 265 |
+
return LSTMDenseNet(
|
| 266 |
+
input_size=model_config["input_size"],
|
| 267 |
+
lstm_hidden_size=model_config["lstm_hidden_size"], # <-- Novo parâmetro pro pyproject tbm
|
| 268 |
+
dense_hidden_size=model_config["dense_hidden_size"], # <-- Novo parâmetro pro pyproject tbm
|
| 269 |
+
output_size=model_config["output_size"],
|
| 270 |
+
num_layers=model_config.get("num_layers", 1),
|
| 271 |
+
dropout=model_config.get("dropout", 0.0)
|
| 272 |
+
)
|
| 273 |
+
|
| 274 |
+
elif model_type == "mlp":
|
| 275 |
+
print(f"Criando modelo MLPNet...")
|
| 276 |
+
# Para o MLP, o tamanho da entrada é a sequência inteira achatada
|
| 277 |
+
mlp_input_size = model_config["sequence_length"] * model_config["input_size"]
|
| 278 |
+
return MLPNet(
|
| 279 |
+
input_size=mlp_input_size,
|
| 280 |
+
hidden_size=model_config["hidden_size"], # Usa 'hidden_size'
|
| 281 |
+
output_size=model_config["output_size"]
|
| 282 |
+
)
|
| 283 |
+
else:
|
| 284 |
+
raise ValueError(f"Tipo de modelo desconhecido: {model_type}")
|
images/mlflow_print.png
ADDED
|
pyproject.toml
ADDED
|
@@ -0,0 +1,149 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[build-system]
|
| 2 |
+
requires = ["hatchling"]
|
| 3 |
+
build-backend = "hatchling.build"
|
| 4 |
+
|
| 5 |
+
[project]
|
| 6 |
+
name = "fleven"
|
| 7 |
+
version = "1.0.0"
|
| 8 |
+
description = "FLEVEn — Federated Learning for Vehicular Environment"
|
| 9 |
+
|
| 10 |
+
readme = "README.md"
|
| 11 |
+
keywords = ["federated-learning", "flower", "mlflow", "federated-learning-vehicular", "time-series", "lstm", "pytorch", "obd"]
|
| 12 |
+
classifiers = [
|
| 13 |
+
"Programming Language :: Python :: 3.9",
|
| 14 |
+
"Programming Language :: Python :: 3.10",
|
| 15 |
+
"Programming Language :: Python :: 3.11",
|
| 16 |
+
"License :: OSI Approved :: Apache Software License",
|
| 17 |
+
"Operating System :: OS Independent",
|
| 18 |
+
"Topic :: Scientific/Engineering :: Artificial Intelligence",
|
| 19 |
+
"Topic :: Software Development :: Libraries :: Python Modules"
|
| 20 |
+
]
|
| 21 |
+
authors = [
|
| 22 |
+
{name = "João C. Braz", email = "calbraz@gmail.com"},
|
| 23 |
+
{name = "José Wilson C. Souza", email = "josewilson@matematica.ufrj.br"},
|
| 24 |
+
{name = "Erick de Souza Lima", email = "erickcefetbcc@gmail.com"},
|
| 25 |
+
{name = "Mina", email = "minammonteiro4@gmail.com"},
|
| 26 |
+
]
|
| 27 |
+
|
| 28 |
+
maintainers = [
|
| 29 |
+
{name = "José Wilson C. Souza", email = "josewilson@matematica.ufrj.br"}
|
| 30 |
+
]
|
| 31 |
+
|
| 32 |
+
license = {text = "Apache-2.0"}
|
| 33 |
+
dependencies = [
|
| 34 |
+
"flwr[simulation]>=1.22.0,<2.0",
|
| 35 |
+
"torch>=2.0.0",
|
| 36 |
+
"pandas>=2.0.0",
|
| 37 |
+
"numpy>=1.24.0",
|
| 38 |
+
"scikit-learn>=1.3.0",
|
| 39 |
+
"matplotlib>=3.7.0",
|
| 40 |
+
"seaborn>=0.12.0",
|
| 41 |
+
"toml",
|
| 42 |
+
"mlflow>=2.9.0"
|
| 43 |
+
]
|
| 44 |
+
|
| 45 |
+
[project.urls]
|
| 46 |
+
Repository = "https://github.com/josewilsonsouza/fleven"
|
| 47 |
+
"Issue Tracker" = "https://github.com/josewilsonsouza/fleven/issues"
|
| 48 |
+
|
| 49 |
+
[tool.hatch.build.targets.wheel]
|
| 50 |
+
packages = ["fleven"]
|
| 51 |
+
|
| 52 |
+
[tool.flwr.app]
|
| 53 |
+
publisher = "Lainf_Dmtic_Inmetro"
|
| 54 |
+
|
| 55 |
+
[tool.flwr.app.components]
|
| 56 |
+
serverapp = "fleven.server:app"
|
| 57 |
+
clientapp = "fleven.client:app"
|
| 58 |
+
|
| 59 |
+
[tool.flwr.app.config]
|
| 60 |
+
# Configurações de Federação
|
| 61 |
+
strategy = "fedavg" # Opções: "fedavg", "fedadam", "fedadagrad", "fedyogi"
|
| 62 |
+
rounds = 10
|
| 63 |
+
min-nodes = 3
|
| 64 |
+
seed = 42
|
| 65 |
+
|
| 66 |
+
# 🔧 Caminhos (ajustar conforme necessário). (deixar só "" parece que funciona)
|
| 67 |
+
data-base-path = "" #"C:/Users/abece/Documents/fleven-test/data"
|
| 68 |
+
metrics-base-path = "" #"C:/Users/abece/Documents/fleven-test/metrics"
|
| 69 |
+
results-base-path = "" #"C:/Users/abece/Documents/fleven-test/results"
|
| 70 |
+
|
| 71 |
+
# Configurações MLflow
|
| 72 |
+
mlflow-tracking-uri = "http://127.0.0.1:5000" # URI do servidor MLflow
|
| 73 |
+
mlflow-experiment-name = "FLEVEn-Experiments"
|
| 74 |
+
mlflow-enable = true # Habilitar/desabilitar MLflow
|
| 75 |
+
|
| 76 |
+
|
| 77 |
+
# Escolha dos modelos do FLEVEn: "lstm", "lstm_dense", "mlp"
|
| 78 |
+
model-type = "lstm_dense"
|
| 79 |
+
|
| 80 |
+
# Parâmetros para "lstm" e "mlp"
|
| 81 |
+
hidden-size = 32
|
| 82 |
+
|
| 83 |
+
# Parâmetros para "lstm_dense" (o novo modelo adaptado)
|
| 84 |
+
lstm-hidden-size = 32 # Tamanho da camada LSTM
|
| 85 |
+
dense-hidden-size = 16 # Tamanho da camada Densa intermediária
|
| 86 |
+
|
| 87 |
+
# Parâmetros para "lstm" e "lstm_dense"
|
| 88 |
+
input-size = 3
|
| 89 |
+
num-layers = 1
|
| 90 |
+
dropout = 0.2 # Taxa de dropout (0.0 para desativar)
|
| 91 |
+
|
| 92 |
+
# Configurações de Séries Temporais
|
| 93 |
+
sequence-length = 100
|
| 94 |
+
prediction-length = 50
|
| 95 |
+
target-column = "vehicle_speed"
|
| 96 |
+
|
| 97 |
+
# Configurações de Treinamento
|
| 98 |
+
batch-size = 32
|
| 99 |
+
learning-rate = 1e-5
|
| 100 |
+
local-epochs = 1
|
| 101 |
+
max-grad-norm = 1.0
|
| 102 |
+
|
| 103 |
+
# Configurações de Dados
|
| 104 |
+
train-test-split = 0.8
|
| 105 |
+
|
| 106 |
+
# Configurações de Checkpoint
|
| 107 |
+
save-checkpoint-every = 5
|
| 108 |
+
|
| 109 |
+
[tool.flwr.app.config.strategy-params]
|
| 110 |
+
# Parâmetros para FedAdam
|
| 111 |
+
eta = 0.01
|
| 112 |
+
beta_1 = 0.9
|
| 113 |
+
beta_2 = 0.999
|
| 114 |
+
|
| 115 |
+
# Parâmetros para FedAdagrad
|
| 116 |
+
eta_adagrad = 0.1
|
| 117 |
+
initial_accumulator_value = 0.1
|
| 118 |
+
|
| 119 |
+
# Parâmetros para FedYogi
|
| 120 |
+
eta_yogi = 0.01
|
| 121 |
+
beta_1_yogi = 0.9
|
| 122 |
+
beta_2_yogi = 0.999
|
| 123 |
+
initial_accumulator_value_yogi = 1e-6
|
| 124 |
+
|
| 125 |
+
[tool.flwr.federations]
|
| 126 |
+
default = "local-simulation"
|
| 127 |
+
|
| 128 |
+
[tool.flwr.federations.local-simulation]
|
| 129 |
+
options.num-supernodes = 3
|
| 130 |
+
options.backend.client-resources.num-cpus = 2
|
| 131 |
+
options.backend.client-resources.num-gpus = 0.0
|
| 132 |
+
|
| 133 |
+
# essas configs vão ser importantes mais na frente, quando quisermos,
|
| 134 |
+
# por exemplo, outros parametros para certos clients
|
| 135 |
+
#[[tool.flwr.federations.local-simulation.options.supernode.resources]]
|
| 136 |
+
#node-config.partition-id = 1
|
| 137 |
+
#node-config.num-partitions = 3
|
| 138 |
+
|
| 139 |
+
#[[tool.flwr.federations.local-simulation.options.supernode.resources]]
|
| 140 |
+
#node-config.partition-id = 2
|
| 141 |
+
#node-config.num-partitions = 3
|
| 142 |
+
|
| 143 |
+
#[[tool.flwr.federations.local-simulation.options.supernode.resources]]
|
| 144 |
+
#node-config.partition-id = 3
|
| 145 |
+
#node-config.num-partitions = 3
|
| 146 |
+
|
| 147 |
+
[tool.flwr.federations.fleven-deployment]
|
| 148 |
+
address = "127.0.0.1:9093"
|
| 149 |
+
insecure = true
|