Upload app.py
Browse files
app.py
CHANGED
|
@@ -367,17 +367,53 @@ def build_entity_to_entity_graph(entities_by_doc: List[List[Tuple[str, str]]]) -
|
|
| 367 |
"target": entity_to_id[ent2],
|
| 368 |
"weight": weight,
|
| 369 |
"source_entity": ent1[0],
|
| 370 |
-
"target_entity": ent2[0]
|
|
|
|
| 371 |
})
|
| 372 |
|
| 373 |
# Ordenar arestas por peso
|
| 374 |
entity_edges.sort(key=lambda x: x["weight"], reverse=True)
|
| 375 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 376 |
return {
|
| 377 |
"nodes": nodes,
|
| 378 |
"edges": entity_edges[:200], # Limitar a 200 arestas mais fortes
|
| 379 |
"node_count": len(nodes),
|
| 380 |
-
"edge_count": len(entity_edges)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 381 |
}
|
| 382 |
|
| 383 |
|
|
|
|
| 367 |
"target": entity_to_id[ent2],
|
| 368 |
"weight": weight,
|
| 369 |
"source_entity": ent1[0],
|
| 370 |
+
"target_entity": ent2[0],
|
| 371 |
+
"reason": f"Aparecem juntos em {weight} documento(s)"
|
| 372 |
})
|
| 373 |
|
| 374 |
# Ordenar arestas por peso
|
| 375 |
entity_edges.sort(key=lambda x: x["weight"], reverse=True)
|
| 376 |
|
| 377 |
+
# Calcular m茅tricas de grafo
|
| 378 |
+
# Degree centrality (n煤mero de conex玫es de cada n贸)
|
| 379 |
+
degree = defaultdict(int)
|
| 380 |
+
for edge in entity_edges:
|
| 381 |
+
degree[edge["source"]] += edge["weight"]
|
| 382 |
+
degree[edge["target"]] += edge["weight"]
|
| 383 |
+
|
| 384 |
+
# Calcular max degree para normaliza莽茫o
|
| 385 |
+
max_degree = max(degree.values()) if degree else 1
|
| 386 |
+
|
| 387 |
+
# Atualizar n贸s com m茅tricas
|
| 388 |
+
hubs = []
|
| 389 |
+
for node in nodes:
|
| 390 |
+
node_degree = degree.get(node["id"], 0)
|
| 391 |
+
node["degree"] = node_degree
|
| 392 |
+
node["centrality"] = round(node_degree / max_degree, 3)
|
| 393 |
+
|
| 394 |
+
# Classificar n贸
|
| 395 |
+
if node["centrality"] > 0.7:
|
| 396 |
+
node["role"] = "hub" # Hub central
|
| 397 |
+
hubs.append(node)
|
| 398 |
+
elif node["centrality"] > 0.3:
|
| 399 |
+
node["role"] = "connector" # Conector
|
| 400 |
+
else:
|
| 401 |
+
node["role"] = "peripheral" # Perif茅rico
|
| 402 |
+
|
| 403 |
+
# Top hubs
|
| 404 |
+
hubs.sort(key=lambda x: x["degree"], reverse=True)
|
| 405 |
+
|
| 406 |
return {
|
| 407 |
"nodes": nodes,
|
| 408 |
"edges": entity_edges[:200], # Limitar a 200 arestas mais fortes
|
| 409 |
"node_count": len(nodes),
|
| 410 |
+
"edge_count": len(entity_edges),
|
| 411 |
+
"hubs": [{"entity": h["entity"], "type": h["type"], "degree": h["degree"]} for h in hubs[:5]],
|
| 412 |
+
"insights": {
|
| 413 |
+
"total_connections": sum(degree.values()) // 2,
|
| 414 |
+
"avg_degree": round(sum(degree.values()) / len(degree), 1) if degree else 0,
|
| 415 |
+
"hub_count": len(hubs)
|
| 416 |
+
}
|
| 417 |
}
|
| 418 |
|
| 419 |
|