José Wilson commited on
Commit
44714bc
·
1 Parent(s): f3fd845

update fleven-server

Browse files
.gitignore ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Python
2
+ __pycache__/
3
+ *.pyc
4
+ venv/
5
+
6
+ # Flower
7
+ .flwr/
8
+ *.fab
9
+
10
+ # Resultados
11
+ results/
12
+ metrics/
13
+ mlartifacts/
14
+ mlruns/
15
+ data/
16
+
17
+ *.pt
18
+ *.pth
19
+
20
+ # Jupyter Notebooks
21
+ *.ipynb_checkpoints
22
+ DataClientCreate.ipynb
Dockerfile ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Usar uma imagem base Python slim
2
+ FROM python:3.11-slim
3
+
4
+ # Definir o diretório de trabalho dentro do contêiner
5
+ WORKDIR /app
6
+
7
+ # Copiar TODO o conteúdo do seu projeto para o diretório /app no contêiner
8
+ # Incluindo a pasta 'fleven', 'pyproject.toml', etc.
9
+ COPY . .
10
+
11
+ # Instalar as dependências do projeto definidas no pyproject.toml
12
+ # O "." indica para instalar o projeto no diretório atual
13
+ RUN pip install --no-cache-dir .
14
+
15
+ # Expor a porta que o servidor Flower (SuperNode) usará
16
+ # O padrão do HF Spaces é 7860, mas vamos usar 8080 que é o padrão do Flower
17
+ # O HF redirecionará o tráfego externo para esta porta interna
18
+ EXPOSE 8080
19
+
20
+ # Comando para iniciar o SERVIDOR Flower (SuperLink/SuperNode no modo servidor)
21
+ # NÃO use 'flwr run .', que é para simulação. Use 'flower-supernode'.
22
+ # '--app' aponta para seu objeto ServerApp em fleven/server.py
23
+ # '--address 0.0.0.0:8080' faz o servidor ouvir em todas as interfaces na porta 8080 dentro do contêiner
24
+ CMD ["flower-supernode", \
25
+ "--app", "fleven.server:app", \
26
+ "--address", "0.0.0.0:8080"]
estrutura.txt ADDED
Binary file (3.71 kB). View file
 
fleven/__init__.py ADDED
File without changes
fleven/analysis.py ADDED
@@ -0,0 +1,354 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Funções para análise e visualização de resultados."""
2
+ import matplotlib
3
+ matplotlib.use("Agg")
4
+
5
+ import pandas as pd
6
+ import numpy as np
7
+ import matplotlib.pyplot as plt
8
+ import seaborn as sns
9
+ from pathlib import Path
10
+ import json
11
+ from datetime import datetime
12
+ from fleven.collector import MetricsCollector
13
+
14
+ plt.style.use('seaborn-v0_8-darkgrid')
15
+ sns.set_palette("husl")
16
+
17
+
18
+ def create_visualizations(collector: MetricsCollector, output_dir: Path):
19
+ """Cria todas as visualizações de desempenho."""
20
+
21
+ client_ids = collector.active_client_ids
22
+ print(f"Analisando clientes com IDs: {client_ids}")
23
+
24
+ # Para cores dinâmicas
25
+ colors = sns.color_palette("husl", n_colors=len(client_ids))
26
+ fig, axes = plt.subplots(2, 2, figsize=(15, 12))
27
+
28
+ # Subplot 1: Desempenho Global
29
+ ax1 = axes[0, 0]
30
+ rounds = collector.train_metrics["rounds"]
31
+ if len(rounds) > 0 and len(collector.train_metrics["global_train_loss"]) > 0:
32
+ ax1.plot(rounds, collector.train_metrics["global_train_loss"],
33
+ 'b-', marker='s', label='Treino Global', linewidth=2)
34
+ if len(collector.eval_metrics["rounds"]) > 0 and len(collector.eval_metrics["global_eval_loss"]) > 0:
35
+ ax1.plot(collector.eval_metrics["rounds"], collector.eval_metrics["global_eval_loss"],
36
+ 'r-', marker='o', label='Validação Global', linewidth=2)
37
+ ax1.set_title('Desempenho do Modelo Global', fontsize=14, fontweight='bold')
38
+ ax1.set_xlabel('Rodada')
39
+ ax1.set_ylabel('Perda (MSE)')
40
+ ax1.legend()
41
+ ax1.grid(True, alpha=0.3)
42
+
43
+ # Subplot 2: Comparação entre Clientes (Treino)
44
+ ax2 = axes[0, 1]
45
+ colors = ['#2E7D32', '#1565C0', '#E65100']
46
+
47
+ for idx, client_id in enumerate(client_ids):
48
+ key = f"client_{client_id}_train_loss"
49
+ if key in collector.train_metrics and collector.train_metrics[key]:
50
+ client_losses = collector.train_metrics[key]
51
+ if len(client_losses) > 0:
52
+ client_rounds = [r for r_idx, r in enumerate(rounds) if r_idx < len(client_losses)]
53
+ if len(client_rounds) == len(client_losses):
54
+ ax2.plot(client_rounds, client_losses,
55
+ marker='o', label=f'Cliente {client_id}', color=colors[idx], linewidth=1.5)
56
+
57
+ if len(rounds) > 0 and len(collector.train_metrics["global_train_loss"]) > 0:
58
+ ax2.plot(rounds, collector.train_metrics["global_train_loss"],
59
+ 'k--', label='Média Global', linewidth=2, alpha=0.7)
60
+ ax2.set_title('Perda de Treinamento por Cliente', fontsize=14, fontweight='bold')
61
+ ax2.set_xlabel('Rodada')
62
+ ax2.set_ylabel('Perda de Treino (MSE)')
63
+ ax2.legend()
64
+ ax2.grid(True, alpha=0.3)
65
+
66
+ # Subplot 3: Comparação entre Clientes (Validação)
67
+ ax3 = axes[1, 0]
68
+ eval_rounds = collector.eval_metrics["rounds"]
69
+
70
+ for idx, client_id in enumerate(client_ids):
71
+ key = f"client_{client_id}_eval_loss"
72
+ if key in collector.eval_metrics and collector.eval_metrics[key]:
73
+ client_losses = collector.eval_metrics[key]
74
+ if len(client_losses) > 0:
75
+ client_rounds = [r for r_idx, r in enumerate(eval_rounds) if r_idx < len(client_losses)]
76
+ if len(client_rounds) == len(client_losses):
77
+ ax3.plot(client_rounds, client_losses,
78
+ marker='s', label=f'Cliente {client_id}', color=colors[idx], linewidth=1.5)
79
+
80
+ if len(eval_rounds) > 0 and len(collector.eval_metrics["global_eval_loss"]) > 0:
81
+ ax3.plot(eval_rounds, collector.eval_metrics["global_eval_loss"],
82
+ 'k--', label='Média Global', linewidth=2, alpha=0.7)
83
+ ax3.set_title('Perda de Validação por Cliente', fontsize=14, fontweight='bold')
84
+ ax3.set_xlabel('Rodada')
85
+ ax3.set_ylabel('Perda de Validação (MSE)')
86
+ ax3.legend()
87
+ ax3.grid(True, alpha=0.3)
88
+
89
+ # Subplot 4: Taxa de Melhoria
90
+ ax4 = axes[1, 1]
91
+ if len(rounds) > 1 and len(collector.train_metrics["global_train_loss"]) > 1:
92
+ train_improvement = np.diff(collector.train_metrics["global_train_loss"])
93
+ ax4.plot(rounds[1:], train_improvement, 'g-', marker='v', label='Δ Treino', linewidth=1.5)
94
+
95
+ if len(eval_rounds) > 1 and len(collector.eval_metrics["global_eval_loss"]) > 1:
96
+ eval_improvement = np.diff(collector.eval_metrics["global_eval_loss"])
97
+ ax4.plot(eval_rounds[1:], eval_improvement,
98
+ 'm-', marker='^', label='Δ Validação', linewidth=1.5)
99
+
100
+ ax4.axhline(y=0, color='k', linestyle='--', alpha=0.5)
101
+ ax4.set_title('Taxa de Melhoria (Δ Perda)', fontsize=14, fontweight='bold')
102
+ ax4.set_xlabel('Rodada')
103
+ ax4.set_ylabel('Mudança na Perda')
104
+ ax4.legend()
105
+ ax4.grid(True, alpha=0.3)
106
+
107
+ plt.suptitle(f'Análise de Desempenho - Estratégia: {collector.strategy_name.upper()}',
108
+ fontsize=16, fontweight='bold')
109
+ plt.tight_layout()
110
+ plt.savefig(output_dir / f'performance_analysis_{collector.strategy_name}.pdf', dpi=300, bbox_inches='tight')
111
+ plt.close()
112
+
113
+ # Gráfico de Convergência
114
+ _create_convergence_plot(collector, output_dir, eval_rounds)
115
+
116
+ # Heatmap
117
+ _create_heatmap(collector, output_dir, eval_rounds)
118
+
119
+ print(f"Visualizações salvas em {output_dir}")
120
+
121
+
122
+ def _create_convergence_plot(collector: MetricsCollector, output_dir: Path, eval_rounds):
123
+ """Cria gráfico de convergência."""
124
+ client_ids = collector.active_client_ids
125
+ fig, axes = plt.subplots(1, 3, figsize=(18, 6))
126
+
127
+ for round_idx, round_num in enumerate(eval_rounds):
128
+ client_losses = []
129
+ for client_id in client_ids:
130
+ key = f"client_{client_id}_eval_loss"
131
+ if key in collector.eval_metrics and round_idx < len(collector.eval_metrics[key]):
132
+ client_losses.append(collector.eval_metrics[key][round_idx])
133
+
134
+ if len(client_losses) > 1:
135
+ var, std, diff = collector.calculate_convergence_metrics(client_losses)
136
+ collector.convergence_metrics["rounds"].append(round_num)
137
+ collector.convergence_metrics["loss_variance"].append(var)
138
+ collector.convergence_metrics["loss_std"].append(std)
139
+ collector.convergence_metrics["max_min_diff"].append(diff)
140
+
141
+ if len(collector.convergence_metrics["rounds"]) > 0:
142
+ ax1 = axes[0]
143
+ ax1.plot(collector.convergence_metrics["rounds"],
144
+ collector.convergence_metrics["loss_variance"],
145
+ 'b-', marker='o', linewidth=2)
146
+ ax1.fill_between(collector.convergence_metrics["rounds"],
147
+ collector.convergence_metrics["loss_variance"],
148
+ alpha=0.3)
149
+ ax1.set_title('Variância da Perda entre Clientes', fontsize=14, fontweight='bold')
150
+ ax1.set_xlabel('Rodada')
151
+ ax1.set_ylabel('Variância')
152
+ ax1.grid(True, alpha=0.3)
153
+
154
+ ax2 = axes[1]
155
+ ax2.plot(collector.convergence_metrics["rounds"],
156
+ collector.convergence_metrics["loss_std"],
157
+ 'g-', marker='s', linewidth=2)
158
+ ax2.fill_between(collector.convergence_metrics["rounds"],
159
+ collector.convergence_metrics["loss_std"],
160
+ alpha=0.3, color='green')
161
+ ax2.set_title('Desvio Padrão da Perda entre Clientes', fontsize=14, fontweight='bold')
162
+ ax2.set_xlabel('Rodada')
163
+ ax2.set_ylabel('Desvio Padrão')
164
+ ax2.grid(True, alpha=0.3)
165
+
166
+ ax3 = axes[2]
167
+ ax3.plot(collector.convergence_metrics["rounds"],
168
+ collector.convergence_metrics["max_min_diff"],
169
+ 'r-', marker='^', linewidth=2)
170
+ ax3.fill_between(collector.convergence_metrics["rounds"],
171
+ collector.convergence_metrics["max_min_diff"],
172
+ alpha=0.3, color='red')
173
+ ax3.set_title('Diferença Máx-Mín entre Clientes', fontsize=14, fontweight='bold')
174
+ ax3.set_xlabel('Rodada')
175
+ ax3.set_ylabel('Diferença')
176
+ ax3.grid(True, alpha=0.3)
177
+ else:
178
+ for ax in axes:
179
+ ax.text(0.5, 0.5, 'Dados insuficientes',
180
+ ha='center', va='center', transform=ax.transAxes)
181
+
182
+ plt.suptitle(f'Análise de Convergência e Heterogeneidade - {collector.strategy_name.upper()}',
183
+ fontsize=16, fontweight='bold')
184
+ plt.tight_layout()
185
+ plt.savefig(output_dir / f'convergence_analysis_{collector.strategy_name}.pdf', dpi=300, bbox_inches='tight')
186
+ plt.close()
187
+
188
+
189
+ def _create_heatmap(collector: MetricsCollector, output_dir: Path, eval_rounds):
190
+ """Cria heatmap de performance."""
191
+ client_ids = collector.active_client_ids
192
+ fig, ax = plt.subplots(figsize=(12, 6))
193
+
194
+ heatmap_data = []
195
+ max_rounds = len(eval_rounds) if eval_rounds else 0
196
+
197
+ has_data = False
198
+ for client_id in client_ids:
199
+ key = f"client_{client_id}_eval_loss"
200
+ if key in collector.eval_metrics and collector.eval_metrics[key]:
201
+ has_data = True
202
+ break
203
+
204
+ if has_data and max_rounds > 0:
205
+ for client_id in client_ids:
206
+ key = f"client_{client_id}_eval_loss"
207
+ if key in collector.eval_metrics and collector.eval_metrics[key]:
208
+ client_data = list(collector.eval_metrics[key])
209
+ while len(client_data) < max_rounds:
210
+ client_data.append(np.nan)
211
+ heatmap_data.append(client_data[:max_rounds])
212
+ else:
213
+ heatmap_data.append([np.nan] * max_rounds)
214
+
215
+ heatmap_array = np.array(heatmap_data, dtype=float)
216
+ masked_array = np.ma.masked_invalid(heatmap_array)
217
+
218
+ im = ax.imshow(masked_array, aspect='auto', cmap='RdYlGn_r')
219
+
220
+ ax.set_xticks(range(max_rounds))
221
+ ax.set_xticklabels(eval_rounds[:max_rounds])
222
+ ax.set_yticks(range(len(client_ids)))
223
+ ax.set_yticklabels([f'Cliente {cid}' for cid in client_ids])
224
+
225
+ ax.set_xlabel('Rodada', fontsize=12)
226
+ ax.set_title(f'Mapa de Calor - Perda de Validação por Cliente - {collector.strategy_name.upper()}',
227
+ fontsize=14, fontweight='bold')
228
+
229
+ for i in range(len(heatmap_data)):
230
+ for j in range(min(len(heatmap_data[i]), max_rounds)):
231
+ if not np.isnan(heatmap_data[i][j]):
232
+ text = ax.text(j, i, f'{heatmap_data[i][j]:.4f}',
233
+ ha="center", va="center", color="black", fontsize=8)
234
+
235
+ plt.colorbar(im, ax=ax, label='Perda (MSE)')
236
+ else:
237
+ ax.text(0.5, 0.5, 'Dados insuficientes para gerar heatmap',
238
+ ha='center', va='center', transform=ax.transAxes, fontsize=12)
239
+ ax.set_title(f'Mapa de Calor - Perda de Validação por Cliente - {collector.strategy_name.upper()}',
240
+ fontsize=14, fontweight='bold')
241
+
242
+ plt.tight_layout()
243
+ plt.savefig(output_dir / f'heatmap_performance_{collector.strategy_name}.pdf', dpi=300, bbox_inches='tight')
244
+ plt.close()
245
+
246
+
247
+ def save_detailed_metrics(collector: MetricsCollector, output_dir: Path):
248
+ """Salva métricas detalhadas em diferentes formatos."""
249
+
250
+ # Salvar CSVs de treino e avaliação
251
+ if collector.train_metrics_by_round:
252
+ train_df = pd.DataFrame(collector.train_metrics_by_round)
253
+ train_df['phase'] = 'train'
254
+ train_csv = output_dir / f'train_metrics_{collector.strategy_name}.csv'
255
+ train_df.to_csv(train_csv, index=False)
256
+ print(f"Métricas de treino salvas em {train_csv}")
257
+
258
+ if collector.eval_metrics_by_round:
259
+ eval_df = pd.DataFrame(collector.eval_metrics_by_round)
260
+ eval_df['phase'] = 'eval'
261
+ eval_csv = output_dir / f'eval_metrics_{collector.strategy_name}.csv'
262
+ eval_df.to_csv(eval_csv, index=False)
263
+ print(f"Métricas de avaliação salvas em {eval_csv}")
264
+
265
+ # Calcular estatísticas
266
+ stats = {
267
+ "strategy": collector.strategy_name,
268
+ "total_rounds": len(collector.train_metrics["rounds"]),
269
+ "final_global_train_loss": float(collector.train_metrics["global_train_loss"][-1]) if collector.train_metrics["global_train_loss"] else None,
270
+ "final_global_eval_loss": float(collector.eval_metrics["global_eval_loss"][-1]) if collector.eval_metrics["global_eval_loss"] else None,
271
+ "train_improvement": float((collector.train_metrics["global_train_loss"][0] - collector.train_metrics["global_train_loss"][-1])) if len(collector.train_metrics["global_train_loss"]) > 1 else 0,
272
+ "eval_improvement": float((collector.eval_metrics["global_eval_loss"][0] - collector.eval_metrics["global_eval_loss"][-1])) if len(collector.eval_metrics["global_eval_loss"]) > 1 else 0,
273
+ "timestamp": datetime.now().isoformat()
274
+ }
275
+
276
+ if collector.convergence_metrics["rounds"]:
277
+ stats["convergence_metrics"] = {
278
+ "rounds": collector.convergence_metrics["rounds"],
279
+ "final_variance": float(collector.convergence_metrics["loss_variance"][-1]) if collector.convergence_metrics["loss_variance"] else None,
280
+ "final_std": float(collector.convergence_metrics["loss_std"][-1]) if collector.convergence_metrics["loss_std"] else None,
281
+ "final_max_min_diff": float(collector.convergence_metrics["max_min_diff"][-1]) if collector.convergence_metrics["max_min_diff"] else None,
282
+ }
283
+
284
+ # Salvar JSON
285
+ json_file = output_dir / f'analysis_{collector.strategy_name}.json'
286
+ with open(json_file, 'w') as f:
287
+ json.dump(stats, f, indent=2)
288
+ print(f"Análise estatística salva em {json_file}")
289
+
290
+ # Salvar sumário em texto
291
+ _save_summary_text(collector, stats, output_dir)
292
+
293
+
294
+ def _save_summary_text(collector: MetricsCollector, stats: dict, output_dir: Path):
295
+ """Salva sumário em formato texto."""
296
+ summary_file = output_dir / f'summary_{collector.strategy_name}.txt'
297
+ with open(summary_file, 'w', encoding='utf-8') as f:
298
+ f.write("="*60 + "\n")
299
+ f.write(f"RELATÓRIO DE TREINAMENTO - {collector.strategy_name.upper()}\n")
300
+ f.write("="*60 + "\n\n")
301
+
302
+ f.write(f"Data: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n")
303
+ f.write(f"Total de rodadas: {stats['total_rounds']}\n\n")
304
+
305
+ if stats['final_global_train_loss']:
306
+ f.write("--- MÉTRICAS DE TREINO ---\n")
307
+ f.write(f"Loss inicial: {collector.train_metrics['global_train_loss'][0]:.6f}\n")
308
+ f.write(f"Loss final: {stats['final_global_train_loss']:.6f}\n")
309
+ f.write(f"Melhoria: {stats['train_improvement']:.6f} ({(stats['train_improvement']/collector.train_metrics['global_train_loss'][0]*100):.2f}%)\n\n")
310
+
311
+ if stats['final_global_eval_loss']:
312
+ f.write("--- MÉTRICAS DE AVALIAÇÃO ---\n")
313
+ f.write(f"Loss inicial: {collector.eval_metrics['global_eval_loss'][0]:.6f}\n")
314
+ f.write(f"Loss final: {stats['final_global_eval_loss']:.6f}\n")
315
+ f.write(f"Melhoria: {stats['eval_improvement']:.6f} ({(stats['eval_improvement']/collector.eval_metrics['global_eval_loss'][0]*100):.2f}%)\n\n")
316
+
317
+ if 'convergence_metrics' in stats:
318
+ f.write("--- MÉTRICAS DE CONVERGÊNCIA ---\n")
319
+ f.write(f"Desvio padrão final: {stats['convergence_metrics']['final_std']:.6f}\n")
320
+ f.write(f"Variância final: {stats['convergence_metrics']['final_variance']:.6f}\n")
321
+ f.write(f"Diferença máx-mín final: {stats['convergence_metrics']['final_max_min_diff']:.6f}\n")
322
+
323
+ print(f"Sumário salvo em {summary_file}")
324
+
325
+
326
+ def print_final_summary(collector: MetricsCollector):
327
+ """Imprime sumário final no console."""
328
+ print("\n" + "="*60)
329
+ print("RESUMO DO TREINAMENTO")
330
+ print("="*60)
331
+
332
+ if collector.train_metrics["global_train_loss"]:
333
+ initial_loss = collector.train_metrics["global_train_loss"][0]
334
+ final_loss = collector.train_metrics["global_train_loss"][-1]
335
+ improvement = ((initial_loss - final_loss) / initial_loss) * 100
336
+
337
+ print(f"Perda inicial de treino: {initial_loss:.6f}")
338
+ print(f"Perda final de treino: {final_loss:.6f}")
339
+ print(f"Melhoria no treino: {improvement:.2f}%")
340
+
341
+ if collector.eval_metrics["global_eval_loss"]:
342
+ initial_eval = collector.eval_metrics["global_eval_loss"][0]
343
+ final_eval = collector.eval_metrics["global_eval_loss"][-1]
344
+ eval_improvement = ((initial_eval - final_eval) / initial_eval) * 100
345
+
346
+ print(f"\nPerda inicial de validação: {initial_eval:.6f}")
347
+ print(f"Perda final de validação: {final_eval:.6f}")
348
+ print(f"Melhoria na validação: {eval_improvement:.2f}%")
349
+
350
+ # Análise de convergência
351
+ if collector.convergence_metrics["loss_std"]:
352
+ final_std = collector.convergence_metrics["loss_std"][-1]
353
+ print(f"\nDesvio padrão final entre clientes: {final_std:.6f}")
354
+ print(f"Convergência: {'Boa' if final_std < 0.01 else 'Moderada' if final_std < 0.05 else 'Baixa'}")
fleven/client.py ADDED
@@ -0,0 +1,285 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from flwr.app import Context, Message, ArrayRecord, MetricRecord, RecordDict
3
+ from flwr.clientapp import ClientApp
4
+ from fleven.utils import get_model, load_data, train, test, set_seed
5
+ from pathlib import Path
6
+ import json
7
+ from datetime import datetime
8
+
9
+ # Verifica se a GPU está disponível
10
+ DEVICE = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
11
+
12
+ class MetricsTracker:
13
+ """Classe para rastrear e salvar métricas locais do cliente."""
14
+
15
+ def __init__(self, client_id, metrics_base_path=None):
16
+ self.client_id = client_id
17
+
18
+ # 🔧 Define o diretório de métricas de forma robusta
19
+ if metrics_base_path:
20
+ # Usa o caminho configurado
21
+ self.metrics_dir = Path(metrics_base_path) / f"client_{self.client_id}"
22
+ else:
23
+ # Tenta usar caminho relativo ao arquivo atual
24
+ base_dir = Path(__file__).parent.parent
25
+ self.metrics_dir = base_dir / "metrics" / f"client_{self.client_id}"
26
+
27
+ # Cria o diretório se não existir
28
+ self.metrics_dir.mkdir(parents=True, exist_ok=True)
29
+ self.history_file = self.metrics_dir / "metrics_history.json"
30
+ self.history = self.load_history()
31
+
32
+ print(f"[Cliente {self.client_id}] Métricas serão salvas em: {self.metrics_dir.absolute()}")
33
+
34
+ def load_history(self):
35
+ if self.history_file.exists():
36
+ with open(self.history_file, 'r') as f:
37
+ return json.load(f)
38
+ return {"train": [], "eval": []}
39
+
40
+ def get_next_round_number(self):
41
+ """Retorna o próximo número de rodada baseado no histórico."""
42
+ if not self.history["train"]:
43
+ return 1
44
+ return self.history["train"][-1]["round"] + 1
45
+
46
+ def add_train_metrics(self, round_num, loss):
47
+ existing_rounds = [entry["round"] for entry in self.history["train"]]
48
+ if round_num not in existing_rounds:
49
+ self.history["train"].append({
50
+ "round": round_num,
51
+ "loss": loss,
52
+ "timestamp": datetime.now().isoformat()
53
+ })
54
+ else:
55
+ for entry in self.history["train"]:
56
+ if entry["round"] == round_num:
57
+ entry["loss"] = loss
58
+ entry["timestamp"] = datetime.now().isoformat()
59
+ break
60
+
61
+ def add_eval_metrics(self, round_num, loss):
62
+ existing_rounds = [entry["round"] for entry in self.history["eval"]]
63
+ if round_num not in existing_rounds:
64
+ self.history["eval"].append({
65
+ "round": round_num,
66
+ "loss": loss,
67
+ "timestamp": datetime.now().isoformat()
68
+ })
69
+ else:
70
+ for entry in self.history["eval"]:
71
+ if entry["round"] == round_num:
72
+ entry["loss"] = loss
73
+ entry["timestamp"] = datetime.now().isoformat()
74
+ break
75
+
76
+ def save_metrics(self):
77
+ try:
78
+ with open(self.history_file, 'w') as f:
79
+ json.dump(self.history, f, indent=4)
80
+ print(f"[Cliente {self.client_id}] Métricas salvas com sucesso")
81
+ except Exception as e:
82
+ print(f"[Cliente {self.client_id}] AVISO: Não foi possível salvar métricas: {e}")
83
+
84
+ def save_checkpoint(self, net, round_num):
85
+ try:
86
+ model_path = self.metrics_dir / f"model_round_{round_num}.pt"
87
+ torch.save(net.state_dict(), model_path)
88
+ print(f"[Cliente {self.client_id}] Checkpoint salvo: {model_path}")
89
+ except Exception as e:
90
+ print(f"[Cliente {self.client_id}] AVISO: Não foi possível salvar checkpoint: {e}")
91
+
92
+ # Estado do ator para evitar recarregar dados a cada rodada
93
+ model_state = {
94
+ "net": None,
95
+ "trainloader": None,
96
+ "testloader": None,
97
+ "metrics_tracker": None,
98
+ "client_id": None,
99
+ }
100
+
101
+ def initialize_client_state(client_id: int, context: Context):
102
+ """Inicializa o estado do cliente lendo configurações do Context."""
103
+ if model_state["client_id"] == client_id:
104
+ return
105
+
106
+ global_seed = int(context.run_config.get("seed", 42))
107
+ # seed única para cada cliente
108
+ client_seed = global_seed + client_id
109
+ set_seed(client_seed)
110
+ print(f"[Cliente {client_id}] Usando seed local para replicação: {client_seed}")
111
+
112
+ print(f"Ator [Node {context.node_id}] a inicializar para Client ID: {client_id}")
113
+
114
+ # lendo algumas configs do context
115
+ sequence_length = int(context.run_config.get("sequence-length", 60))
116
+ prediction_length = int(context.run_config.get("prediction-length", 10))
117
+ batch_size = int(context.run_config.get("batch-size", 32))
118
+ train_test_split = float(context.run_config.get("train-test-split", 0.8))
119
+
120
+ target_column = str(context.run_config.get("target-column", "P_kW"))
121
+ print(f"[Cliente {client_id}] Configurado para prever a coluna: '{target_column}'")
122
+
123
+ model_type = context.run_config.get("model-type", "lstm")
124
+ num_layers = int(context.run_config.get("num-layers", 1))
125
+
126
+ # Parâmetros para "lstm" e "mlp"
127
+ hidden_size = int(context.run_config.get("hidden-size", 32))
128
+
129
+ # Parâmetros para "lstm_dense" (o novo modelo adaptado)
130
+ lstm_hidden_size = int(context.run_config.get("lstm-hidden-size", 32))
131
+ dense_hidden_size = int(context.run_config.get("dense-hidden-size", 16))
132
+
133
+ # Parâmetro de Dropout para "lstm" e "lstm_dense"
134
+ dropout = float(context.run_config.get("dropout", 0.0))
135
+
136
+ # 🔧 Lê os caminhos configurados
137
+ data_base_path = context.run_config.get("data-base-path", None)
138
+ metrics_base_path = context.run_config.get("metrics-base-path", None)
139
+
140
+ # Carrega dados com as configurações
141
+ trainloader, testloader, num_features = load_data(
142
+ client_id,
143
+ sequence_length,
144
+ prediction_length,
145
+ batch_size,
146
+ train_test_split,
147
+ data_base_path=data_base_path,
148
+ target_column=target_column
149
+ )
150
+
151
+ # -todos os parâmetros para o model_config::
152
+
153
+ # 🔧 Dicionário de configuração do modelo
154
+ model_config = {
155
+ "name": model_type,
156
+ "input_size": num_features, # retornado pelo load_data
157
+ "output_size": prediction_length,
158
+ "num_layers": num_layers,
159
+ "sequence_length": sequence_length,
160
+
161
+ # Parâmetros para "lstm" e "mlp"
162
+ "hidden_size": hidden_size,
163
+
164
+ # Parâmetros para "lstm_dense"
165
+ "lstm_hidden_size": lstm_hidden_size,
166
+ "dense_hidden_size": dense_hidden_size,
167
+
168
+ # Parâmetro de Dropout
169
+ "dropout": dropout
170
+ }
171
+
172
+ # Cria rede com as configurações
173
+ net = get_model(model_config).to(DEVICE)
174
+
175
+ model_state.update({
176
+ "net": net,
177
+ "trainloader": trainloader,
178
+ "testloader": testloader,
179
+ "metrics_tracker": MetricsTracker(client_id, metrics_base_path),
180
+ "client_id": client_id,
181
+ })
182
+ print(f"[Cliente {client_id}] inicializado com {len(trainloader.dataset)} amostras de treino.")
183
+
184
+ # Cria a aplicação cliente
185
+ app = ClientApp()
186
+
187
+ @app.train()
188
+ def train_fn(msg: Message, context: Context) -> Message:
189
+ """Treina o modelo localmente."""
190
+ # 🔧 faz um mapeamento partition-id para client_id
191
+ partition_id = int(context.node_config["partition-id"])
192
+ client_id = partition_id
193
+
194
+ print(f"[DEBUG] Train - partition-id={partition_id} -> client_id={client_id}")
195
+
196
+ initialize_client_state(client_id, context)
197
+
198
+ net = model_state["net"]
199
+ trainloader = model_state["trainloader"]
200
+ metrics_tracker = model_state["metrics_tracker"]
201
+
202
+ # Obtém o próximo número de rodada do histórico
203
+ #round_num = metrics_tracker.get_next_round_number()
204
+ round_num = int(msg.content["config"].get("server-round", 0))
205
+
206
+ print(f"\n[Node {context.node_id}, Cliente {client_id}] === Ronda de Treino {round_num} ===")
207
+
208
+ arrays = msg.content["arrays"]
209
+ net.load_state_dict(arrays.to_torch_state_dict())
210
+
211
+ # Lê configurações de treino do context
212
+ local_epochs = int(context.run_config.get("local-epochs", 1))
213
+ learning_rate = float(context.run_config.get("learning-rate", 1e-5))
214
+ max_grad_norm = float(context.run_config.get("max-grad-norm", 1.0))
215
+ save_checkpoint_every = int(context.run_config.get("save-checkpoint-every", 5))
216
+
217
+ avg_train_loss = train(
218
+ net,
219
+ trainloader,
220
+ epochs=local_epochs,
221
+ learning_rate=learning_rate,
222
+ max_grad_norm=max_grad_norm,
223
+ device=DEVICE
224
+ )
225
+ print(f"[Node {context.node_id}, Cliente {client_id}] Perda de treino: {avg_train_loss:.6f}")
226
+
227
+ metrics_tracker.add_train_metrics(round_num, avg_train_loss)
228
+ if round_num % save_checkpoint_every == 0:
229
+ metrics_tracker.save_checkpoint(net, round_num)
230
+ metrics_tracker.save_metrics()
231
+
232
+ model_record = ArrayRecord(net.state_dict())
233
+ metrics = MetricRecord({
234
+ "train_loss": avg_train_loss,
235
+ "num-examples": len(trainloader.dataset),
236
+ "client_id": client_id,
237
+ })
238
+
239
+ content = RecordDict({"arrays": model_record, "metrics": metrics})
240
+ return Message(content=content, reply_to=msg)
241
+
242
+ @app.evaluate()
243
+ def evaluate_fn(msg: Message, context: Context) -> Message:
244
+ """Avalia o modelo localmente."""
245
+
246
+ # partition-id para client_id
247
+ partition_id = int(context.node_config["partition-id"])
248
+ client_id = partition_id
249
+
250
+ print(f"[DEBUG] Evaluate - partition-id={partition_id} -> client_id={client_id}")
251
+
252
+ initialize_client_state(client_id, context)
253
+
254
+ net = model_state["net"]
255
+ testloader = model_state["testloader"]
256
+ metrics_tracker = model_state["metrics_tracker"]
257
+
258
+ # Usa o último número de rodada do histórico de treino
259
+ if metrics_tracker.history["train"]:
260
+ #round_num = metrics_tracker.history["train"][-1]["round"]
261
+ round_num = int(msg.content["config"].get("server-round", 0))
262
+ else:
263
+ round_num = 1
264
+
265
+ arrays = msg.content["arrays"]
266
+ net.load_state_dict(arrays.to_torch_state_dict())
267
+
268
+ loss, num_examples = test(net, testloader, device=DEVICE)
269
+ print(f"[Node {context.node_id}, Cliente {client_id}] Perda de validação: {loss:.6f}")
270
+
271
+ metrics_tracker.add_eval_metrics(round_num, loss)
272
+ metrics_tracker.save_metrics()
273
+
274
+ metrics = MetricRecord({
275
+ "eval_loss": loss,
276
+ "num-examples": num_examples,
277
+ "client_id": client_id,
278
+ })
279
+
280
+ content = RecordDict({"metrics": metrics})
281
+ return Message(content=content, reply_to=msg)
282
+
283
+ if __name__ == "__main__":
284
+ print("Cliente pronto para ser executado com Flower 1.22.0")
285
+ print("Use: flwr run . ou flower-supernode para deployment")
fleven/collector.py ADDED
@@ -0,0 +1,95 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Coletor de métricas para análise de treinamento federado."""
2
+ import numpy as np
3
+
4
+
5
+ class MetricsCollector:
6
+ """Coleta e organiza métricas de treinamento e validação."""
7
+
8
+ def __init__(self, strategy_name):
9
+ self.strategy_name = strategy_name
10
+ self.train_metrics_by_round = []
11
+ self.eval_metrics_by_round = []
12
+
13
+ self.convergence_metrics = {
14
+ "rounds": [],
15
+ "loss_variance": [],
16
+ "loss_std": [],
17
+ "max_min_diff": []
18
+ }
19
+
20
+ @property
21
+ def active_client_ids(self):
22
+ """
23
+ Inspeciona os dados coletados e retorna uma lista ordenada de
24
+ IDs de clientes únicos que enviaram métricas.
25
+ """
26
+ ids = set()
27
+ all_metrics_by_round = self.train_metrics_by_round + self.eval_metrics_by_round
28
+
29
+ for round_data in all_metrics_by_round:
30
+ for key in round_data.keys():
31
+ if key.startswith("client_"):
32
+ # Extrai o número de 'client_X_...'
33
+ try:
34
+ client_id = int(key.split("_")[1])
35
+ ids.add(client_id)
36
+ except (ValueError, IndexError):
37
+ # Ignora chaves que não seguem o padrão esperado
38
+ continue
39
+
40
+ return sorted(list(ids))
41
+
42
+ def add_train_round(self, round_num, metrics):
43
+ """Adiciona métricas de uma rodada de treinamento."""
44
+ round_data = {
45
+ "round": round_num,
46
+ "global_train_loss": metrics.get("global_train_loss", None),
47
+ "client_0_train_loss": metrics.get("client_0_train_loss", None),
48
+ "client_1_train_loss": metrics.get("client_1_train_loss", None),
49
+ "client_2_train_loss": metrics.get("client_2_train_loss", None),
50
+ }
51
+ self.train_metrics_by_round.append(round_data)
52
+
53
+ def add_eval_round(self, round_num, metrics):
54
+ """Adiciona métricas de uma rodada de avaliação."""
55
+ round_data = {
56
+ "round": round_num,
57
+ "global_eval_loss": metrics.get("global_eval_loss", None),
58
+ "client_0_eval_loss": metrics.get("client_0_eval_loss", None),
59
+ "client_1_eval_loss": metrics.get("client_1_eval_loss", None),
60
+ "client_2_eval_loss": metrics.get("client_2_eval_loss", None),
61
+ }
62
+ self.eval_metrics_by_round.append(round_data)
63
+
64
+ def calculate_convergence_metrics(self, client_losses):
65
+ """Calcula métricas de convergência entre clientes."""
66
+ if len(client_losses) > 0:
67
+ variance = np.var(client_losses)
68
+ std_dev = np.std(client_losses)
69
+ max_min_diff = max(client_losses) - min(client_losses)
70
+ return variance, std_dev, max_min_diff
71
+ return 0, 0, 0
72
+
73
+ @property
74
+ def train_metrics(self):
75
+ """Retorna métricas de treino no formato para os gráficos."""
76
+ result = {
77
+ "rounds": [r["round"] for r in self.train_metrics_by_round],
78
+ "global_train_loss": [r["global_train_loss"] for r in self.train_metrics_by_round if r["global_train_loss"] is not None],
79
+ "client_0_train_loss": [r["client_0_train_loss"] for r in self.train_metrics_by_round if r["client_0_train_loss"] is not None],
80
+ "client_1_train_loss": [r["client_1_train_loss"] for r in self.train_metrics_by_round if r["client_1_train_loss"] is not None],
81
+ "client_2_train_loss": [r["client_2_train_loss"] for r in self.train_metrics_by_round if r["client_2_train_loss"] is not None],
82
+ }
83
+ return result
84
+
85
+ @property
86
+ def eval_metrics(self):
87
+ """Retorna métricas de avaliação no formato para os gráficos."""
88
+ result = {
89
+ "rounds": [r["round"] for r in self.eval_metrics_by_round],
90
+ "global_eval_loss": [r["global_eval_loss"] for r in self.eval_metrics_by_round if r["global_eval_loss"] is not None],
91
+ "client_0_eval_loss": [r["client_0_eval_loss"] for r in self.eval_metrics_by_round if r["client_0_eval_loss"] is not None],
92
+ "client_1_eval_loss": [r["client_1_eval_loss"] for r in self.eval_metrics_by_round if r["client_1_eval_loss"] is not None],
93
+ "client_2_eval_loss": [r["client_2_eval_loss"] for r in self.eval_metrics_by_round if r["client_2_eval_loss"] is not None],
94
+ }
95
+ return result
fleven/mlflow_utils.py ADDED
@@ -0,0 +1,162 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import mlflow
2
+ from pathlib import Path
3
+ from typing import Dict, Optional
4
+ import torch
5
+
6
+ """Utilitários para integração com MLflow."""
7
+
8
+ class MLflowTracker:
9
+ """Gerencia logging de experimentos com MLflow."""
10
+
11
+ def __init__(self, tracking_uri: str, experiment_name: str, enabled: bool = True):
12
+ """
13
+ Inicializa o tracker do MLflow.
14
+
15
+ Args:
16
+ tracking_uri: URI do servidor MLflow
17
+ experiment_name: Nome do experimento
18
+ enabled: Se True, habilita logging no MLflow
19
+ """
20
+ self.enabled = enabled
21
+
22
+ if not self.enabled:
23
+ print("[MLflow] Tracking desabilitado")
24
+ return
25
+
26
+ try:
27
+ mlflow.set_tracking_uri(tracking_uri)
28
+ mlflow.set_experiment(experiment_name)
29
+ self.experiment = mlflow.get_experiment_by_name(experiment_name)
30
+ print(f"[MLflow] Conectado ao experimento '{experiment_name}' em {tracking_uri}")
31
+ except Exception as e:
32
+ print(f"[MLflow] AVISO: Erro ao conectar: {e}")
33
+ self.enabled = False
34
+
35
+ def start_run(self, run_name: str, tags: Optional[Dict] = None) -> Optional[mlflow.ActiveRun]:
36
+ """Inicia um novo run no MLflow."""
37
+ if not self.enabled:
38
+ return None
39
+
40
+ try:
41
+ run = mlflow.start_run(run_name=run_name, tags=tags)
42
+ print(f"[MLflow] Run iniciado: {run_name} (ID: {run.info.run_id})")
43
+ return run
44
+ except Exception as e:
45
+ print(f"[MLflow] AVISO: Erro ao iniciar run: {e}")
46
+ return None
47
+
48
+ def end_run(self):
49
+ """Finaliza o run atual."""
50
+ if not self.enabled:
51
+ return
52
+
53
+ try:
54
+ mlflow.end_run()
55
+ print("[MLflow] Run finalizado")
56
+ except Exception as e:
57
+ print(f"[MLflow] AVISO: Erro ao finalizar run: {e}")
58
+
59
+ def log_params(self, params: Dict):
60
+ """Loga parâmetros do experimento."""
61
+ if not self.enabled:
62
+ return
63
+
64
+ try:
65
+ mlflow.log_params(params)
66
+ print(f"[MLflow] {len(params)} parâmetros logados")
67
+ except Exception as e:
68
+ print(f"[MLflow] AVISO: Erro ao logar parâmetros: {e}")
69
+
70
+ def log_metric(self, key: str, value: float, step: Optional[int] = None):
71
+ """Loga uma métrica."""
72
+ if not self.enabled:
73
+ return
74
+
75
+ try:
76
+ mlflow.log_metric(key, value, step=step)
77
+ except Exception as e:
78
+ print(f"[MLflow] AVISO: Erro ao logar métrica {key}: {e}")
79
+
80
+ def log_metrics(self, metrics: Dict, step: Optional[int] = None):
81
+ """Loga múltiplas métricas."""
82
+ if not self.enabled:
83
+ return
84
+
85
+ try:
86
+ mlflow.log_metrics(metrics, step=step)
87
+ except Exception as e:
88
+ print(f"[MLflow] AVISO: Erro ao logar métricas: {e}")
89
+
90
+ def log_artifact(self, local_path: str):
91
+ """Loga um arquivo como artifact."""
92
+ if not self.enabled:
93
+ return
94
+
95
+ try:
96
+ mlflow.log_artifact(local_path)
97
+ print(f"[MLflow] Artifact logado: {local_path}")
98
+ except Exception as e:
99
+ print(f"[MLflow] AVISO: Erro ao logar artifact: {e}")
100
+
101
+ def log_artifacts(self, local_dir: str):
102
+ """Loga um diretório inteiro como artifacts."""
103
+ if not self.enabled:
104
+ return
105
+
106
+ try:
107
+ mlflow.log_artifacts(local_dir)
108
+ print(f"[MLflow] Artifacts logados do diretório: {local_dir}")
109
+ except Exception as e:
110
+ print(f"[MLflow] AVISO: Erro ao logar artifacts: {e}")
111
+
112
+ def log_model(self, model: torch.nn.Module, artifact_path: str = "model"):
113
+ """Loga o modelo PyTorch."""
114
+ if not self.enabled:
115
+ return
116
+
117
+ try:
118
+ mlflow.pytorch.log_model(model, artifact_path)
119
+ print(f"[MLflow] Modelo PyTorch logado em '{artifact_path}'")
120
+ except Exception as e:
121
+ print(f"[MLflow] AVISO: Erro ao logar modelo: {e}")
122
+
123
+ def set_tag(self, key: str, value: str):
124
+ """Define uma tag para o run."""
125
+ if not self.enabled:
126
+ return
127
+
128
+ try:
129
+ mlflow.set_tag(key, value)
130
+ except Exception as e:
131
+ print(f"[MLflow] AVISO: Erro ao definir tag: {e}")
132
+
133
+ def set_tags(self, tags: Dict[str, str]):
134
+ """Define múltiplas tags."""
135
+ if not self.enabled:
136
+ return
137
+
138
+ try:
139
+ mlflow.set_tags(tags)
140
+ except Exception as e:
141
+ print(f"[MLflow] AVISO: Erro ao definir tags: {e}")
142
+
143
+
144
+ def get_mlflow_tracker(context) -> MLflowTracker:
145
+ """
146
+ Cria um MLflowTracker a partir do contexto do Flower.
147
+
148
+ Args:
149
+ context: Context do Flower contendo configurações
150
+
151
+ Returns:
152
+ Instância de MLflowTracker
153
+ """
154
+ mlflow_enabled = context.run_config.get("mlflow-enable", True)
155
+ tracking_uri = context.run_config.get("mlflow-tracking-uri", "http://127.0.0.1:5000")
156
+ experiment_name = context.run_config.get("mlflow-experiment-name", "FLEVEn-Experiments")
157
+
158
+ return MLflowTracker(
159
+ tracking_uri=tracking_uri,
160
+ experiment_name=experiment_name,
161
+ enabled=mlflow_enabled
162
+ )
fleven/server.py ADDED
@@ -0,0 +1,358 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """ServerApp para aprendizado federado com FLEVEn."""
2
+ import torch
3
+ from typing import Iterable, Optional
4
+ from pathlib import Path
5
+ from datetime import datetime
6
+
7
+ from flwr.app import Context, ArrayRecord, MetricRecord
8
+ from flwr.serverapp import ServerApp, Grid
9
+ from flwr.serverapp.strategy import FedAvg, FedAdam, FedYogi, FedAdagrad
10
+ from flwr.common import Message
11
+
12
+ from fleven.utils import set_seed, get_model
13
+ from fleven.collector import MetricsCollector
14
+ from fleven.analysis import create_visualizations, save_detailed_metrics, print_final_summary
15
+ from fleven.mlflow_utils import get_mlflow_tracker # import do fmlfow
16
+
17
+ STRATEGIES = {
18
+ "fedavg": FedAvg,
19
+ "fedadam": FedAdam,
20
+ "fedyogi": FedYogi,
21
+ "fedadagrad": FedAdagrad,
22
+ }
23
+
24
+
25
+ def get_custom_strategy_class(base_strategy_class):
26
+ """Cria dinamicamente uma classe CustomStrategy que herda da estratégia base."""
27
+
28
+ class CustomStrategy(base_strategy_class):
29
+ def __init__(self, collector: MetricsCollector, mlflow_tracker=None, **kwargs):
30
+ super().__init__(**kwargs)
31
+ self.collector = collector
32
+ self.mlflow_tracker = mlflow_tracker # Adicionar tracker
33
+ strategy_name = self.__class__.__bases__[0].__name__
34
+ print(f"CustomStrategy (coletando métricas para {strategy_name}) inicializada.")
35
+
36
+ def aggregate_train(self, server_round: int, replies: Iterable[Message]) -> tuple[Optional[ArrayRecord], Optional[MetricRecord]]:
37
+ aggregated_arrays, aggregated_metrics = super().aggregate_train(server_round, replies)
38
+
39
+ if aggregated_metrics:
40
+ individual_losses = {}
41
+ for reply in replies:
42
+ if reply.has_content() and "metrics" in reply.content:
43
+ metrics = reply.content["metrics"]
44
+ client_id = int(metrics.get("client_id", 0))
45
+ train_loss = float(metrics.get("train_loss", 0.0))
46
+ print(f" > Detalhe Cliente {client_id}: Perda de Treino = {train_loss:.6f}")
47
+ individual_losses[f"client_{client_id}_train_loss"] = train_loss
48
+
49
+ # Log no MLflow - métricas individuais
50
+ if self.mlflow_tracker:
51
+ self.mlflow_tracker.log_metric(
52
+ f"client_{client_id}/train_loss",
53
+ train_loss,
54
+ step=server_round
55
+ )
56
+
57
+ global_loss = aggregated_metrics.get("train_loss")
58
+ metrics_dict = {"global_train_loss": global_loss}
59
+ metrics_dict.update(individual_losses)
60
+ self.collector.add_train_round(server_round, metrics_dict)
61
+
62
+ # Log no MLflow - métrica global
63
+ if self.mlflow_tracker and global_loss is not None:
64
+ self.mlflow_tracker.log_metric(
65
+ "global/train_loss",
66
+ global_loss,
67
+ step=server_round
68
+ )
69
+
70
+ return aggregated_arrays, aggregated_metrics
71
+
72
+ def aggregate_evaluate(self, server_round: int, replies: Iterable[Message]) -> Optional[MetricRecord]:
73
+ aggregated_metrics = super().aggregate_evaluate(server_round, replies)
74
+
75
+ if aggregated_metrics:
76
+ individual_losses = {}
77
+ for reply in replies:
78
+ if reply.has_content() and "metrics" in reply.content:
79
+ metrics = reply.content["metrics"]
80
+ client_id = int(metrics.get("client_id", 0))
81
+ eval_loss = float(metrics.get("eval_loss", 0.0))
82
+ print(f" > Detalhe Cliente {client_id}: Perda de Avaliação = {eval_loss:.6f}")
83
+ individual_losses[f"client_{client_id}_eval_loss"] = eval_loss
84
+
85
+ # Log no MLflow - métricas individuais
86
+ if self.mlflow_tracker:
87
+ self.mlflow_tracker.log_metric(
88
+ f"client_{client_id}/eval_loss",
89
+ eval_loss,
90
+ step=server_round
91
+ )
92
+
93
+ global_loss = aggregated_metrics.get("eval_loss")
94
+ metrics_dict = {"global_eval_loss": global_loss}
95
+ metrics_dict.update(individual_losses)
96
+ self.collector.add_eval_round(server_round, metrics_dict)
97
+
98
+ # Log no MLflow - métrica global
99
+ if self.mlflow_tracker and global_loss is not None:
100
+ self.mlflow_tracker.log_metric(
101
+ "global/eval_loss",
102
+ global_loss,
103
+ step=server_round
104
+ )
105
+
106
+ return aggregated_metrics
107
+
108
+ return CustomStrategy
109
+
110
+
111
+ # Cria a aplicação servidor
112
+ app = ServerApp()
113
+
114
+ @app.main()
115
+ def main(grid: Grid, context: Context) -> None:
116
+ """Função principal do servidor - lê todas as configurações do Context."""
117
+
118
+ mlflow_tracker = get_mlflow_tracker(context)
119
+
120
+ seed = int(context.run_config.get("seed", 42))
121
+ set_seed(seed)
122
+
123
+ # 🔧 configs gerais
124
+ strategy_name = context.run_config.get("strategy", "fedavg").lower()
125
+ num_rounds = int(context.run_config.get("rounds", 5))
126
+ min_nodes = int(context.run_config.get("min-nodes", 3))
127
+
128
+
129
+ # 🔧 Configurações do modelo
130
+ model_type = context.run_config.get("model-type", "lstm")
131
+ input_size = int(context.run_config.get("input-size", 6))
132
+ prediction_length = int(context.run_config.get("prediction-length", 10))
133
+ num_layers = int(context.run_config.get("num-layers", 1))
134
+ sequence_length = int(context.run_config.get("sequence-length", 60))
135
+ target_column = str(context.run_config.get("target-column", "P_kW"))
136
+
137
+ # Parâmetros para "lstm" e "mlp"
138
+ hidden_size = int(context.run_config.get("hidden-size", 32))
139
+
140
+ # Parâmetros para "lstm_dense" (o novo modelo adaptado)
141
+ lstm_hidden_size = int(context.run_config.get("lstm-hidden-size", 32))
142
+ dense_hidden_size = int(context.run_config.get("dense-hidden-size", 16))
143
+
144
+ # Parâmetro de Dropout para "lstm" e "lstm_dense"
145
+ dropout = float(context.run_config.get("dropout", 0.0))
146
+
147
+ # --- FIM DA ALTERAÇÃO 1 ---
148
+
149
+ # Configurações de treino
150
+ batch_size = int(context.run_config.get("batch-size", 32))
151
+ learning_rate = float(context.run_config.get("learning-rate", 1e-5))
152
+ local_epochs = int(context.run_config.get("local-epochs", 1))
153
+ train_test_split = float(context.run_config.get("train-test-split", 0.8))
154
+
155
+ # 🔧 Caminho para salvar resultados
156
+ results_base_path = context.run_config.get("results-base-path", None)
157
+ if results_base_path:
158
+ output_dir = Path(results_base_path)
159
+ else:
160
+ base_dir = Path(__file__).parent.parent
161
+ output_dir = base_dir / "results"
162
+
163
+ output_dir.mkdir(parents=True, exist_ok=True)
164
+
165
+ # Inicia run no MLflow
166
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
167
+ run_name = f"{strategy_name}_{model_type}_{timestamp}"
168
+
169
+ mlflow_tracker.start_run(
170
+ run_name=run_name,
171
+ tags={
172
+ "strategy": strategy_name,
173
+ "model_type": model_type,
174
+ "target": target_column
175
+ }
176
+ )
177
+
178
+ # Log dos parâmetros no MLflow
179
+ mlflow_tracker.log_params({
180
+ "strategy": strategy_name,
181
+ "num_rounds": num_rounds,
182
+ "min_nodes": min_nodes,
183
+ "model_type": model_type,
184
+ "input_size": input_size,
185
+ "prediction_length": prediction_length,
186
+ "num_layers": num_layers,
187
+ "sequence_length": sequence_length,
188
+ "target_column": target_column,
189
+ "batch_size": batch_size,
190
+ "learning_rate": learning_rate,
191
+ "local_epochs": local_epochs,
192
+ "train_test_split": train_test_split,
193
+ "seed": seed,
194
+
195
+ # Novos parâmetros
196
+ "hidden_size": hidden_size,
197
+ "lstm_hidden_size": lstm_hidden_size,
198
+ "dense_hidden_size": dense_hidden_size,
199
+ "dropout": dropout
200
+ })
201
+
202
+ print(f"\n{'='*60}")
203
+ print(f"SERVIDOR DE APRENDIZADO FEDERADO")
204
+ print(f"{'='*60}")
205
+ print(f"Estratégia: {strategy_name.upper()}")
206
+ print(f"Rodadas: {num_rounds}")
207
+ print(f"Nós mínimos: {min_nodes}")
208
+ print(f"Modelo: {model_type.upper()}")
209
+ print(f"Tamanho da Previsão: {prediction_length}")
210
+ print(f"Tamanho Hidden (lstm/mlp): {hidden_size}")
211
+ print(f"Tamanho LSTM Hidden (lstm_dense): {lstm_hidden_size}")
212
+ print(f"Tamanho Dense Hidden (lstm_dense): {dense_hidden_size}")
213
+ print(f"Número de Camadas do Modelo: {num_layers}")
214
+ print(f"Dropout: {dropout}")
215
+ print(f"Target Column: {target_column}")
216
+ print(f"Resultados serão salvos em: {output_dir.absolute()}")
217
+ print(f"{'='*60}\n")
218
+
219
+ # 🔧 Cria coletor de métricas
220
+ collector = MetricsCollector(strategy_name)
221
+
222
+ # 🔧 Cria o dicionário de configuração do modelo
223
+ model_config = {
224
+ "name": model_type,
225
+ "input_size": input_size,
226
+ "output_size": prediction_length,
227
+ "num_layers": num_layers,
228
+ "sequence_length": sequence_length,
229
+
230
+ # Parâmetros para "lstm" e "mlp"
231
+ "hidden_size": hidden_size,
232
+
233
+ # Parâmetros para "lstm_dense"
234
+ "lstm_hidden_size": lstm_hidden_size,
235
+ "dense_hidden_size": dense_hidden_size,
236
+
237
+ # Parâmetro de Dropout
238
+ "dropout": dropout
239
+ }
240
+
241
+ # 🔧 Cria modelo inicial
242
+ net = get_model(model_config)
243
+ initial_arrays = ArrayRecord(net.state_dict())
244
+
245
+ # 🔧 Parâmetros base para a estratégia
246
+ strategy_params = {
247
+ "fraction_train": 1.0,
248
+ "fraction_evaluate": 1.0,
249
+ "min_available_nodes": min_nodes,
250
+ "min_train_nodes": min_nodes,
251
+ "min_evaluate_nodes": min_nodes,
252
+ }
253
+
254
+ # 🔧 Carrega parâmetros específicos da estratégia
255
+ strategy_specific_params = context.run_config.get("strategy-params", {})
256
+
257
+ if strategy_name == "fedadam":
258
+ strategy_params["eta"] = float(strategy_specific_params.get("eta", 0.01))
259
+ strategy_params["beta_1"] = float(strategy_specific_params.get("beta_1", 0.9))
260
+ strategy_params["beta_2"] = float(strategy_specific_params.get("beta_2", 0.999))
261
+ print(f"Carregando FedAdam com: eta={strategy_params['eta']}, beta_1={strategy_params['beta_1']}, beta_2={strategy_params['beta_2']}")
262
+
263
+ # Log parâmetros específicos da estratégia
264
+ mlflow_tracker.log_params({
265
+ "eta": strategy_params["eta"],
266
+ "beta_1": strategy_params["beta_1"],
267
+ "beta_2": strategy_params["beta_2"]
268
+ })
269
+
270
+ elif strategy_name == "fedadagrad":
271
+ strategy_params["eta"] = float(strategy_specific_params.get("eta_adagrad", 0.1))
272
+ strategy_params["initial_accumulator_value"] = float(strategy_specific_params.get("initial_accumulator_value", 0.1))
273
+ print(f"Carregando FedAdagrad com: eta={strategy_params['eta']}, initial_accumulator_value={strategy_params['initial_accumulator_value']}")
274
+
275
+ mlflow_tracker.log_params({
276
+ "eta": strategy_params["eta"],
277
+ "initial_accumulator_value": strategy_params["initial_accumulator_value"]
278
+ })
279
+
280
+ elif strategy_name == "fedyogi":
281
+ strategy_params["eta"] = float(strategy_specific_params.get("eta_yogi", 0.01))
282
+ strategy_params["beta_1"] = float(strategy_specific_params.get("beta_1_yogi", 0.9))
283
+ strategy_params["beta_2"] = float(strategy_specific_params.get("beta_2_yogi", 0.999))
284
+ strategy_params["initial_accumulator_value"] = float(strategy_specific_params.get("initial_accumulator_value_yogi", 1e-6))
285
+ print(f"Carregando FedYogi com: eta={strategy_params['eta']}, beta_1={strategy_params['beta_1']}, beta_2={strategy_params['beta_2']}")
286
+
287
+ mlflow_tracker.log_params({
288
+ "eta": strategy_params["eta"],
289
+ "beta_1": strategy_params["beta_1"],
290
+ "beta_2": strategy_params["beta_2"],
291
+ "initial_accumulator_value": strategy_params["initial_accumulator_value"]
292
+ })
293
+
294
+ # 🔧 Instancia a estratégia de forma dinâmica
295
+ BaseStrategyClass = STRATEGIES.get(strategy_name, FedAvg)
296
+ CustomStrategyClass = get_custom_strategy_class(BaseStrategyClass)
297
+ strategy = CustomStrategyClass(
298
+ collector=collector,
299
+ mlflow_tracker=mlflow_tracker, # Passar tracker para a estratégia
300
+ **strategy_params
301
+ )
302
+
303
+ print("Iniciando servidor FL...")
304
+
305
+ # 🔧 Inicia o treino federado
306
+ result = strategy.start(
307
+ grid=grid,
308
+ initial_arrays=initial_arrays,
309
+ num_rounds=num_rounds,
310
+ )
311
+
312
+ # 🔧 Imprime informações sobre o resultado final
313
+ print("\n" + "="*60)
314
+ if result.arrays:
315
+ print(f"Modelo final obtido com sucesso!")
316
+ total_params = sum(p.numel() for p in result.arrays.to_torch_state_dict().values())
317
+ print(f"Total de parâmetros: {total_params}")
318
+
319
+ # Log do modelo final no MLflow
320
+ final_model = get_model(model_config)
321
+ final_model.load_state_dict(result.arrays.to_torch_state_dict())
322
+ mlflow_tracker.log_model(final_model, "final_model")
323
+
324
+ # Salvar modelo final localmente e logar como artifact
325
+ model_path = output_dir / "final_model.pt"
326
+ torch.save(final_model.state_dict(), model_path)
327
+ mlflow_tracker.log_artifact(str(model_path))
328
+
329
+ print(f"Resultados salvos em: {output_dir.absolute()}")
330
+ print("="*60)
331
+
332
+ # 🔧 Gera análises e visualizações
333
+ print("\nTREINAMENTO CONCLUÍDO - GERANDO ANÁLISES")
334
+ print("="*60)
335
+
336
+ try:
337
+ create_visualizations(collector, output_dir)
338
+ save_detailed_metrics(collector, output_dir)
339
+ print_final_summary(collector)
340
+
341
+ # Log dos artifacts (gráficos, CSVs, etc.) no MLflow
342
+ mlflow_tracker.log_artifacts(str(output_dir))
343
+
344
+ except Exception as e:
345
+ print(f"AVISO: Erro ao gerar análises: {e}")
346
+ print("O treinamento foi concluído com sucesso, mas as visualizações não foram geradas.")
347
+
348
+ # Finaliza o run do MLflow
349
+ mlflow_tracker.end_run()
350
+
351
+ print("\n" + "="*60)
352
+ print("PROCESSAMENTO FINALIZADO")
353
+ print("="*60)
354
+
355
+
356
+ if __name__ == "__main__":
357
+ print("Servidor pronto para ser executado com Flower 1.22.0")
358
+ print("Use: flwr run .")
fleven/utils.py ADDED
@@ -0,0 +1,284 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import numpy as np
3
+ import random
4
+ import torch
5
+ import torch.nn as nn
6
+ import torch.nn.functional as F
7
+ from torch.utils.data import TensorDataset, DataLoader
8
+ from sklearn.preprocessing import MinMaxScaler
9
+ from pathlib import Path
10
+
11
+ def set_seed(seed: int):
12
+ """Seeds para reprodutibilidade."""
13
+ random.seed(seed)
14
+ np.random.seed(seed)
15
+ torch.manual_seed(seed)
16
+ if torch.cuda.is_available():
17
+ torch.cuda.manual_seed_all(seed)
18
+
19
+ # LSTM
20
+ class LSTMNet(nn.Module):
21
+ def __init__(self, input_size, hidden_size, output_size, num_layers=1, dropout=0.0):
22
+ super(LSTMNet, self).__init__()
23
+ self.lstm = nn.LSTM(
24
+ input_size,
25
+ hidden_size,
26
+ num_layers,
27
+ batch_first=True,
28
+ # Dropout entre camadas LSTM empilhadas (se num_layers > 1)
29
+ dropout=dropout if num_layers > 1 else 0.0
30
+ )
31
+ self.linear = nn.Linear(hidden_size, output_size)
32
+
33
+ def forward(self, x):
34
+ lstm_out, _ = self.lstm(x)
35
+ last_time_step_out = lstm_out[:, -1, :]
36
+ out = self.linear(last_time_step_out)
37
+ return out
38
+
39
+ # LSTM -> Dropout -> Dense(ReLU) -> Dense(Output)
40
+ class LSTMDenseNet(nn.Module):
41
+ def __init__(self, input_size, lstm_hidden_size, dense_hidden_size, output_size, num_layers=1, dropout=0.0):
42
+ super(LSTMDenseNet, self).__init__()
43
+ self.lstm = nn.LSTM(
44
+ input_size,
45
+ lstm_hidden_size,
46
+ num_layers,
47
+ batch_first=True,
48
+ dropout=dropout if num_layers > 1 else 0.0
49
+ )
50
+ # Dropout aplicado à saída da camada LSTM
51
+ self.dropout = nn.Dropout(dropout)
52
+ self.fc1 = nn.Linear(lstm_hidden_size, dense_hidden_size)
53
+ self.fc2 = nn.Linear(dense_hidden_size, output_size)
54
+
55
+ def forward(self, x):
56
+ # lstm_out shape: (batch_size, seq_len, lstm_hidden_size)
57
+ lstm_out, _ = self.lstm(x)
58
+
59
+ # saída do último passo de tempo
60
+ # (batch_size, lstm_hidden_size)
61
+ last_time_step_out = lstm_out[:, -1, :]
62
+
63
+ # Aplica dropout
64
+ out = self.dropout(last_time_step_out)
65
+
66
+ # Passa pelas camadas densas
67
+ out = self.fc1(out)
68
+ out = F.relu(out) # Aplicando ReLU como no notebook
69
+ out = self.fc2(out)
70
+ return out
71
+
72
+ # MLP
73
+ class MLPNet(nn.Module):
74
+ def __init__(self, input_size, hidden_size, output_size):
75
+ super(MLPNet, self).__init__()
76
+ # O input será a sequência achatada
77
+ self.fc1 = nn.Linear(input_size, hidden_size)
78
+ self.relu = nn.ReLU()
79
+ self.fc2 = nn.Linear(hidden_size, output_size)
80
+
81
+ def forward(self, x):
82
+ # deforma/achata o input de (batch, sequence_length, features) para (batch, sequence_length * features)
83
+ batch_size = x.shape[0]
84
+ x_flat = x.view(batch_size, -1)
85
+
86
+ out = self.fc1(x_flat)
87
+ out = self.relu(out)
88
+ out = self.fc2(out)
89
+ return out
90
+
91
+ def create_sliding_windows(data, sequence_length, prediction_length):
92
+ """Cria janelas deslizantes para problemas de séries temporais."""
93
+ xs, ys = [], []
94
+ for i in range(len(data) - sequence_length - prediction_length + 1):
95
+ x = data[i:(i + sequence_length)]
96
+ y = data[(i + sequence_length):(i + sequence_length + prediction_length), -1]
97
+ xs.append(x)
98
+ ys.append(y)
99
+ return np.array(xs), np.array(ys)
100
+
101
+ def load_data(client_id: int, sequence_length: int, prediction_length: int,
102
+ batch_size: int, train_test_split: float, data_base_path: str = None,
103
+ target_column: str = "P_kW"):
104
+ """
105
+ Carrega os dados para um cliente específico, processa e retorna DataLoaders.
106
+
107
+ Args:
108
+ client_id: ID do cliente
109
+ sequence_length: Tamanho da janela de entrada
110
+ prediction_length: Número de passos à frente para prever
111
+ batch_size: Tamanho do batch
112
+ train_test_split: Proporção de dados para treino (ex: 0.8 = 80%)
113
+ data_base_path: Caminho base para os dados (opcional),
114
+ target_column: O nome da coluna a ser usada como alvo da previsão
115
+ """
116
+ # 🔧 Define o diretório de dados de forma robusta
117
+ if data_base_path:
118
+ # Usa o caminho configurado
119
+ data_dir = Path(data_base_path) / f"client_{client_id}"
120
+ print(f"[Cliente {client_id}] Usando data_base_path configurado: {data_dir}")
121
+ else:
122
+ # Usa caminho relativo ao arquivo atual
123
+ base_dir = Path(__file__).parent.parent
124
+ data_dir = base_dir / "data" / f"client_{client_id}"
125
+ print(f"[Cliente {client_id}] Usando caminho relativo: {data_dir}")
126
+
127
+ print(f"[Cliente {client_id}] Procurando dados em: {data_dir.absolute()}")
128
+
129
+ # Verifica se o diretório existe
130
+ if not data_dir.exists():
131
+ raise FileNotFoundError(
132
+ f"Diretório não encontrado para o cliente {client_id}: {data_dir.absolute()}"
133
+ )
134
+
135
+ # Carrega todos os arquivos CSV do diretório
136
+ csv_files = list(data_dir.glob("*.csv"))
137
+
138
+ if not csv_files:
139
+ raise FileNotFoundError(
140
+ f"Nenhum arquivo CSV encontrado para o cliente {client_id} no diretório {data_dir.absolute()}"
141
+ )
142
+
143
+ print(f"[Cliente {client_id}] Encontrados {len(csv_files)} arquivos CSV")
144
+ all_routes_df = [pd.read_csv(f) for f in csv_files]
145
+ combined_df = pd.concat(all_routes_df, ignore_index=True)
146
+
147
+ all_columns = ['vehicle_speed', 'engine_rpm', 'P_kW']
148
+
149
+ # se a coluna alvo existe
150
+ if target_column not in all_columns:
151
+ raise ValueError(
152
+ f"A coluna alvo '{target_column}' não é uma das colunas válidas: {all_columns}"
153
+ )
154
+
155
+ # Reordena as colunas para garantir que a coluna alvo seja a ÚLTIMA
156
+ feature_columns = [col for col in all_columns if col != target_column] + [target_column]
157
+ processed_df = combined_df[feature_columns].dropna()
158
+
159
+ split_index = int(len(processed_df) * train_test_split)
160
+ train_df = processed_df.iloc[:split_index]
161
+ test_df = processed_df.iloc[split_index:]
162
+
163
+ scaler = MinMaxScaler()
164
+ scaler.fit(train_df)
165
+
166
+ train_scaled = scaler.transform(train_df)
167
+ test_scaled = scaler.transform(test_df)
168
+
169
+ X_train, y_train = create_sliding_windows(train_scaled, sequence_length, prediction_length)
170
+ X_test, y_test = create_sliding_windows(test_scaled, sequence_length, prediction_length)
171
+
172
+ if len(X_train) == 0 or len(X_test) == 0:
173
+ raise ValueError(
174
+ f"A divisão de dados para o cliente {client_id} resultou em um conjunto vazio."
175
+ )
176
+
177
+ X_train_tensor = torch.from_numpy(X_train).float()
178
+ y_train_tensor = torch.from_numpy(y_train).float()
179
+ X_test_tensor = torch.from_numpy(X_test).float()
180
+ y_test_tensor = torch.from_numpy(y_test).float()
181
+
182
+ train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
183
+ test_dataset = TensorDataset(X_test_tensor, y_test_tensor)
184
+
185
+ trainloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
186
+ testloader = DataLoader(test_dataset, batch_size=batch_size)
187
+
188
+ num_features = X_train_tensor.shape[2]
189
+
190
+ print(f"[Cliente {client_id}] Dados carregados: {len(train_dataset)} treino, {len(test_dataset)} teste")
191
+
192
+ return trainloader, testloader, num_features
193
+
194
+ def train(net, trainloader, epochs: int, learning_rate: float,
195
+ max_grad_norm: float, device):
196
+ """Treina e retorna a perda média por amostra."""
197
+ criterion = torch.nn.MSELoss(reduction="mean")
198
+ optimizer = torch.optim.Adam(net.parameters(), lr=learning_rate)
199
+ net.to(device)
200
+ net.train()
201
+
202
+ total_loss_sum = 0.0
203
+ total_samples = 0
204
+
205
+ for _ in range(epochs):
206
+ for sequences, labels in trainloader:
207
+ sequences, labels = sequences.to(device), labels.to(device)
208
+ optimizer.zero_grad()
209
+ outputs = net(sequences)
210
+ loss = criterion(outputs, labels)
211
+ loss.backward()
212
+ torch.nn.utils.clip_grad_norm_(net.parameters(), max_norm=max_grad_norm)
213
+ optimizer.step()
214
+
215
+ batch_size = sequences.size(0)
216
+ total_loss_sum += loss.item() * batch_size
217
+ total_samples += batch_size
218
+
219
+ if total_samples == 0:
220
+ return 0.0
221
+ return total_loss_sum / total_samples
222
+
223
+
224
+ def test(net, testloader, device):
225
+ """Avalia e retorna (avg_loss_per_sample, num_examples)."""
226
+ criterion = torch.nn.MSELoss(reduction="mean")
227
+ net.to(device)
228
+ net.eval()
229
+ total_loss_sum = 0.0
230
+ total_samples = 0
231
+ with torch.no_grad():
232
+ for sequences, labels in testloader:
233
+ sequences, labels = sequences.to(device), labels.to(device)
234
+ outputs = net(sequences)
235
+ loss = criterion(outputs, labels)
236
+ batch_size = sequences.size(0)
237
+ total_loss_sum += loss.item() * batch_size
238
+ total_samples += batch_size
239
+
240
+ if total_samples == 0:
241
+ return 0.0, 0
242
+ avg_loss = total_loss_sum / total_samples
243
+ return avg_loss, total_samples
244
+
245
+ def get_model(model_config: dict):
246
+ """
247
+ Fábrica de modelos que retorna uma instância de modelo com base na configuração.
248
+ """
249
+ model_type = model_config.get("name", "lstm").lower()
250
+
251
+ if model_type == "lstm":
252
+ print(f"Criando modelo LSTMNet (Simples: LSTM -> Linear)...")
253
+ # Modelo original do projeto, agora com dropout
254
+ return LSTMNet(
255
+ input_size=model_config["input_size"],
256
+ hidden_size=model_config["hidden_size"], # Usa 'hidden_size'
257
+ output_size=model_config["output_size"],
258
+ num_layers=model_config.get("num_layers", 1),
259
+ dropout=model_config.get("dropout", 0.0)
260
+ )
261
+
262
+ elif model_type == "lstm_dense":
263
+ print(f"Criando modelo LSTMDenseNet (Adaptado: LSTM -> Dense -> Linear)...")
264
+ # modelo adaptado de um dos notebook do DACAI
265
+ return LSTMDenseNet(
266
+ input_size=model_config["input_size"],
267
+ lstm_hidden_size=model_config["lstm_hidden_size"], # <-- Novo parâmetro pro pyproject tbm
268
+ dense_hidden_size=model_config["dense_hidden_size"], # <-- Novo parâmetro pro pyproject tbm
269
+ output_size=model_config["output_size"],
270
+ num_layers=model_config.get("num_layers", 1),
271
+ dropout=model_config.get("dropout", 0.0)
272
+ )
273
+
274
+ elif model_type == "mlp":
275
+ print(f"Criando modelo MLPNet...")
276
+ # Para o MLP, o tamanho da entrada é a sequência inteira achatada
277
+ mlp_input_size = model_config["sequence_length"] * model_config["input_size"]
278
+ return MLPNet(
279
+ input_size=mlp_input_size,
280
+ hidden_size=model_config["hidden_size"], # Usa 'hidden_size'
281
+ output_size=model_config["output_size"]
282
+ )
283
+ else:
284
+ raise ValueError(f"Tipo de modelo desconhecido: {model_type}")
images/mlflow_print.png ADDED
pyproject.toml ADDED
@@ -0,0 +1,149 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [build-system]
2
+ requires = ["hatchling"]
3
+ build-backend = "hatchling.build"
4
+
5
+ [project]
6
+ name = "fleven"
7
+ version = "1.0.0"
8
+ description = "FLEVEn — Federated Learning for Vehicular Environment"
9
+
10
+ readme = "README.md"
11
+ keywords = ["federated-learning", "flower", "mlflow", "federated-learning-vehicular", "time-series", "lstm", "pytorch", "obd"]
12
+ classifiers = [
13
+ "Programming Language :: Python :: 3.9",
14
+ "Programming Language :: Python :: 3.10",
15
+ "Programming Language :: Python :: 3.11",
16
+ "License :: OSI Approved :: Apache Software License",
17
+ "Operating System :: OS Independent",
18
+ "Topic :: Scientific/Engineering :: Artificial Intelligence",
19
+ "Topic :: Software Development :: Libraries :: Python Modules"
20
+ ]
21
+ authors = [
22
+ {name = "João C. Braz", email = "calbraz@gmail.com"},
23
+ {name = "José Wilson C. Souza", email = "josewilson@matematica.ufrj.br"},
24
+ {name = "Erick de Souza Lima", email = "erickcefetbcc@gmail.com"},
25
+ {name = "Mina", email = "minammonteiro4@gmail.com"},
26
+ ]
27
+
28
+ maintainers = [
29
+ {name = "José Wilson C. Souza", email = "josewilson@matematica.ufrj.br"}
30
+ ]
31
+
32
+ license = {text = "Apache-2.0"}
33
+ dependencies = [
34
+ "flwr[simulation]>=1.22.0,<2.0",
35
+ "torch>=2.0.0",
36
+ "pandas>=2.0.0",
37
+ "numpy>=1.24.0",
38
+ "scikit-learn>=1.3.0",
39
+ "matplotlib>=3.7.0",
40
+ "seaborn>=0.12.0",
41
+ "toml",
42
+ "mlflow>=2.9.0"
43
+ ]
44
+
45
+ [project.urls]
46
+ Repository = "https://github.com/josewilsonsouza/fleven"
47
+ "Issue Tracker" = "https://github.com/josewilsonsouza/fleven/issues"
48
+
49
+ [tool.hatch.build.targets.wheel]
50
+ packages = ["fleven"]
51
+
52
+ [tool.flwr.app]
53
+ publisher = "Lainf_Dmtic_Inmetro"
54
+
55
+ [tool.flwr.app.components]
56
+ serverapp = "fleven.server:app"
57
+ clientapp = "fleven.client:app"
58
+
59
+ [tool.flwr.app.config]
60
+ # Configurações de Federação
61
+ strategy = "fedavg" # Opções: "fedavg", "fedadam", "fedadagrad", "fedyogi"
62
+ rounds = 10
63
+ min-nodes = 3
64
+ seed = 42
65
+
66
+ # 🔧 Caminhos (ajustar conforme necessário). (deixar só "" parece que funciona)
67
+ data-base-path = "" #"C:/Users/abece/Documents/fleven-test/data"
68
+ metrics-base-path = "" #"C:/Users/abece/Documents/fleven-test/metrics"
69
+ results-base-path = "" #"C:/Users/abece/Documents/fleven-test/results"
70
+
71
+ # Configurações MLflow
72
+ mlflow-tracking-uri = "http://127.0.0.1:5000" # URI do servidor MLflow
73
+ mlflow-experiment-name = "FLEVEn-Experiments"
74
+ mlflow-enable = true # Habilitar/desabilitar MLflow
75
+
76
+
77
+ # Escolha dos modelos do FLEVEn: "lstm", "lstm_dense", "mlp"
78
+ model-type = "lstm_dense"
79
+
80
+ # Parâmetros para "lstm" e "mlp"
81
+ hidden-size = 32
82
+
83
+ # Parâmetros para "lstm_dense" (o novo modelo adaptado)
84
+ lstm-hidden-size = 32 # Tamanho da camada LSTM
85
+ dense-hidden-size = 16 # Tamanho da camada Densa intermediária
86
+
87
+ # Parâmetros para "lstm" e "lstm_dense"
88
+ input-size = 3
89
+ num-layers = 1
90
+ dropout = 0.2 # Taxa de dropout (0.0 para desativar)
91
+
92
+ # Configurações de Séries Temporais
93
+ sequence-length = 100
94
+ prediction-length = 50
95
+ target-column = "vehicle_speed"
96
+
97
+ # Configurações de Treinamento
98
+ batch-size = 32
99
+ learning-rate = 1e-5
100
+ local-epochs = 1
101
+ max-grad-norm = 1.0
102
+
103
+ # Configurações de Dados
104
+ train-test-split = 0.8
105
+
106
+ # Configurações de Checkpoint
107
+ save-checkpoint-every = 5
108
+
109
+ [tool.flwr.app.config.strategy-params]
110
+ # Parâmetros para FedAdam
111
+ eta = 0.01
112
+ beta_1 = 0.9
113
+ beta_2 = 0.999
114
+
115
+ # Parâmetros para FedAdagrad
116
+ eta_adagrad = 0.1
117
+ initial_accumulator_value = 0.1
118
+
119
+ # Parâmetros para FedYogi
120
+ eta_yogi = 0.01
121
+ beta_1_yogi = 0.9
122
+ beta_2_yogi = 0.999
123
+ initial_accumulator_value_yogi = 1e-6
124
+
125
+ [tool.flwr.federations]
126
+ default = "local-simulation"
127
+
128
+ [tool.flwr.federations.local-simulation]
129
+ options.num-supernodes = 3
130
+ options.backend.client-resources.num-cpus = 2
131
+ options.backend.client-resources.num-gpus = 0.0
132
+
133
+ # essas configs vão ser importantes mais na frente, quando quisermos,
134
+ # por exemplo, outros parametros para certos clients
135
+ #[[tool.flwr.federations.local-simulation.options.supernode.resources]]
136
+ #node-config.partition-id = 1
137
+ #node-config.num-partitions = 3
138
+
139
+ #[[tool.flwr.federations.local-simulation.options.supernode.resources]]
140
+ #node-config.partition-id = 2
141
+ #node-config.num-partitions = 3
142
+
143
+ #[[tool.flwr.federations.local-simulation.options.supernode.resources]]
144
+ #node-config.partition-id = 3
145
+ #node-config.num-partitions = 3
146
+
147
+ [tool.flwr.federations.fleven-deployment]
148
+ address = "127.0.0.1:9093"
149
+ insecure = true