GraphGen / graphgen /models /evaluator /kg /structure_evaluator.py
github-actions[bot]
Auto-sync from demo at Tue Dec 30 02:55:31 UTC 2025
734ba45
from collections import Counter
from typing import Any, Dict, Optional
import numpy as np
from scipy import stats
from graphgen.bases import BaseGraphStorage
from graphgen.utils import logger
class StructureEvaluator:
"""Evaluates structural robustness of the graph."""
def __init__(
self,
graph_storage: BaseGraphStorage,
noise_ratio_threshold: float = 0.15,
largest_cc_ratio_threshold: float = 0.90,
avg_degree_min: float = 2.0,
avg_degree_max: float = 5.0,
powerlaw_r2_threshold: float = 0.75,
):
self.graph_storage = graph_storage
self.noise_ratio_threshold = noise_ratio_threshold
self.largest_cc_ratio_threshold = largest_cc_ratio_threshold
self.avg_degree_min = avg_degree_min
self.avg_degree_max = avg_degree_max
self.powerlaw_r2_threshold = powerlaw_r2_threshold
def evaluate(self) -> Dict[str, Any]:
"""
Evaluate the structural robustness of the graph.
:return:
"""
storage = self.graph_storage
total_nodes = storage.get_node_count()
if total_nodes == 0:
return {"error": "Empty graph"}
total_edges = storage.get_edge_count()
degree_map = storage.get_all_node_degrees()
# Noise ratio: isolated nodes / total nodes
isolated_nodes = [nid for nid, deg in degree_map.items() if deg == 0]
noise_ratio = len(isolated_nodes) / total_nodes
# Largest connected component
components = storage.get_connected_components(undirected=True)
largest_cc_ratio = (
len(max(components, key=len)) / total_nodes if components else 0
)
avg_degree = sum(degree_map.values()) / total_nodes
powerlaw_r2 = self._calculate_powerlaw_r2(degree_map)
results = {
"total_nodes": total_nodes,
"total_edges": total_edges,
"noise_ratio": noise_ratio,
"largest_cc_ratio": largest_cc_ratio,
"avg_degree": avg_degree,
"powerlaw_r2": powerlaw_r2,
"is_robust": (
noise_ratio < self.noise_ratio_threshold
and largest_cc_ratio > self.largest_cc_ratio_threshold
and self.avg_degree_min <= avg_degree <= self.avg_degree_max
and (
powerlaw_r2 is not None and powerlaw_r2 > self.powerlaw_r2_threshold
)
),
}
return results
@staticmethod
def _calculate_powerlaw_r2(degree_map: Dict[str, int]) -> Optional[float]:
degrees = [deg for deg in degree_map.values() if deg > 0]
if len(degrees) < 10:
logger.warning("Insufficient nodes for power law fitting")
return None
try:
degree_counts = Counter(degrees)
degree_values, frequencies = zip(*sorted(degree_counts.items()))
if len(degree_values) < 3:
logger.warning(
f"Insufficient unique degrees ({len(degree_values)}) for power law fitting. "
f"Graph may be too uniform."
)
return None
# Fit power law: log(frequency) = a * log(degree) + b
log_degrees = np.log(degree_values)
log_frequencies = np.log(frequencies)
# Linear regression on log-log scale
result = stats.linregress(log_degrees, log_frequencies)
r2 = result.rvalue**2
return float(r2)
except Exception as e:
logger.error(f"Power law R² calculation failed: {e}")
return None