#!/usr/bin/env python3 """Generate results.md and latex_results_section.tex from computed outputs.""" import os import sys import json import glob import argparse import pandas as pd import numpy as np from datetime import datetime sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) from src.utils import ensure_dir def generate_results_md(proc_csv, tables_dir, fig_dir): """Generate the main results.md report.""" df = pd.read_csv(proc_csv) if os.path.exists(proc_csv) else pd.DataFrame() report = [] report.append("# Experimental Results\n") report.append(f"Generated: {datetime.now().isoformat()}\n") report.append(f"Total records: {len(df)}\n") # 1. Goals report.append("\n## 1. Goals\n") report.append(""" Validate the weighted Dobrushin locality theory for variational inference in matrix factorization models. The core theoretical chain is: - Small weighted interaction mass → deletion influence decay → accurate local unlearning → low gradient interference. We test this on synthetic data (Gamma-Poisson, Gaussian-Gaussian, Gaussian-Gamma MAP) and real data (Last.fm, MovieLens). """) # 2. Setup report.append("\n## 2. Setup\n") if len(df) > 0: syn_df = df[df['dataset_type'] == 'synthetic'] if 'dataset_type' in df.columns else df real_df = df[df['dataset_type'] == 'real'] if 'dataset_type' in df.columns else pd.DataFrame() report.append(f"- Synthetic experiments: {len(syn_df)} deletion records\n") report.append(f"- Real-data experiments: {len(real_df)} deletion records\n") if 'model_family' in df.columns: for mf in df['model_family'].unique(): n = len(df[df['model_family'] == mf]) report.append(f"- Model family `{mf}`: {n} records\n") if 'graph_type' in df.columns: report.append(f"- Graph types: {', '.join(df['graph_type'].dropna().unique())}\n") # 3. Synthetic Experiments report.append("\n## 3. Synthetic Experiments\n") report.append("\n### 3.1 Influence Decay\n") fig_path = 'results/figures/synthetic_influence_vs_distance.png' if os.path.exists(fig_path): report.append(f"![Influence vs Distance]({fig_path})\n") report.append(f"*Source: {fig_path}*\n") else: report.append("*Figure not generated.*\n") if len(df) > 0 and 'dataset_type' in df.columns: syn_pg = df[(df['dataset_type'] == 'synthetic') & (df.get('model_family', 'poisson_gamma') == 'poisson_gamma')] if 'empirical_decay_mu' in syn_pg.columns: valid_mu = syn_pg['empirical_decay_mu'].dropna() if len(valid_mu) > 0: report.append(f"\nEmpirical decay rate μ_emp: mean={valid_mu.mean():.4f}, median={valid_mu.median():.4f}, std={valid_mu.std():.4f}\n") report.append("\n### 3.2 Error vs Radius\n") fig_path = 'results/figures/synthetic_error_vs_radius.png' if os.path.exists(fig_path): report.append(f"![Error vs Radius]({fig_path})\n") report.append(f"*Source: {fig_path}*\n") report.append("\n### 3.3 Theory Proxy χ(z)\n") fig_path = 'results/figures/synthetic_chi_vs_error.png' if os.path.exists(fig_path): report.append(f"![Chi vs Error]({fig_path})\n") report.append(f"*Source: {fig_path}*\n") report.append("\n### 3.4 Interference\n") fig_path = 'results/figures/synthetic_interference_vs_chi.png' if os.path.exists(fig_path): report.append(f"![Interference vs Chi]({fig_path})\n") report.append(f"*Source: {fig_path}*\n") report.append("\n### 3.5 Runtime\n") fig_path = 'results/figures/synthetic_runtime_vs_error.png' if os.path.exists(fig_path): report.append(f"![Runtime vs Error]({fig_path})\n") report.append(f"*Source: {fig_path}*\n") # 4. Real-World Experiments report.append("\n## 4. Real-World Experiments\n") report.append("\n### 4.1 Dataset Summary\n") table_path = 'results/tables/table_real_datasets.md' if os.path.exists(table_path): with open(table_path) as f: report.append(f.read()) report.append("\n### 4.2 Influence Decay\n") fig_path = 'results/figures/real_influence_vs_distance.png' if os.path.exists(fig_path): report.append(f"![Real Influence vs Distance]({fig_path})\n") report.append("\n### 4.3 Error vs Radius\n") fig_path = 'results/figures/real_error_vs_radius.png' if os.path.exists(fig_path): report.append(f"![Real Error vs Radius]({fig_path})\n") report.append("\n### 4.4 χ(z) Proxy\n") fig_path = 'results/figures/real_chi_vs_error.png' if os.path.exists(fig_path): report.append(f"![Real Chi vs Error]({fig_path})\n") # 5. Model-Family Ablation report.append("\n## 5. Model-Family Ablation\n") report.append(""" We compare Gaussian-Gaussian, Gaussian-Gamma MAP, and Poisson-Gamma MF to test whether locality is specific to the conjugate count model or appears more broadly. """) for figname, label in [ ('model_family_influence_vs_distance', 'M1: Influence by Model Family'), ('model_family_decay_mu', 'M2: Decay Rate by Model Family'), ('model_family_error_vs_radius', 'M3: Error vs Radius by Model Family'), ('model_family_proxy_vs_error', 'M4: Proxy vs Error Across Models'), ('model_family_prior_noise_ablation', 'M5: Prior/Noise Ablation'), ]: fig_path = f'results/figures/{figname}.png' if os.path.exists(fig_path): report.append(f"\n### {label}\n") report.append(f"![{label}]({fig_path})\n") # 6. Tables report.append("\n## 6. Tables\n") for tbl_name in ['table_synthetic_regimes', 'table_correlations', 'table_method_comparison', 'table_model_family_summary', 'table_model_family_correlations']: md_path = f'results/tables/{tbl_name}.md' if os.path.exists(md_path): report.append(f"\n### {tbl_name.replace('_', ' ').title()}\n") with open(md_path) as f: report.append(f.read()) # 7. Main Findings report.append("\n## 7. Main Findings\n") if len(df) > 0: # Compute summary stats for findings if 'empirical_decay_mu' in df.columns: mu_vals = df['empirical_decay_mu'].dropna() if len(mu_vals) > 0: pct_positive = (mu_vals > 0).mean() * 100 report.append(f"- {pct_positive:.1f}% of deletion experiments show positive decay rate (influence decreases with distance)\n") if 'rel_error_R2' in df.columns and 'rel_error_R4' in df.columns: r2_vals = df['rel_error_R2'].dropna() r4_vals = df['rel_error_R4'].dropna() if len(r2_vals) > 0 and len(r4_vals) > 0: report.append(f"- Mean relative error at R=2: {r2_vals.mean():.4f}, at R=4: {r4_vals.mean():.4f}\n") if r4_vals.mean() < r2_vals.mean(): report.append("- Local approximation error decreases with radius, consistent with locality theory\n") if 'chi_seed_max' in df.columns and 'rel_error_R2' in df.columns: from scipy import stats as scipy_stats x = df['chi_seed_max'].dropna() y = df['rel_error_R2'].dropna() common = x.index.intersection(y.index) x, y = x.loc[common], y.loc[common] mask = np.isfinite(x) & np.isfinite(y) if mask.sum() > 5: r, p = scipy_stats.spearmanr(x[mask], y[mask]) report.append(f"- Spearman correlation between χ_max(z) and local error (R=2): ρ={r:.3f} (p={p:.2e})\n") report.append("\n## 8. Limitations and Failure Cases\n") report.append("See `debug.md` for numerical issues, convergence failures, and excluded runs.\n") return '\n'.join(report) def generate_latex(proc_csv, tables_dir, fig_dir): """Generate latex_results_section.tex.""" df = pd.read_csv(proc_csv) if os.path.exists(proc_csv) else pd.DataFrame() latex = [] latex.append(r"\section{Experimental Results}") latex.append("") latex.append(r"\subsection{Synthetic Validation}") latex.append("") latex.append(r"We validate the weighted Dobrushin locality theory on synthetic Gamma--Poisson matrix factorization data across bounded-degree, Erd\H{o}s--R\'{e}nyi, and power-law bipartite graphs.") latex.append("") # Influence decay figure latex.append(r"\begin{figure}[t]") latex.append(r"\centering") latex.append(r"\includegraphics[width=0.48\textwidth]{results/figures/synthetic_influence_vs_distance.pdf}") latex.append(r"\hfill") latex.append(r"\includegraphics[width=0.48\textwidth]{results/figures/synthetic_error_vs_radius.pdf}") latex.append(r"\caption{Left: Mean deletion influence vs.\ graph distance from the seed set. Right: Local approximation error vs.\ unlearning radius $R$.}") latex.append(r"\label{fig:synthetic_main}") latex.append(r"\end{figure}") latex.append("") # Theory proxy latex.append(r"\begin{figure}[t]") latex.append(r"\centering") latex.append(r"\includegraphics[width=0.48\textwidth]{results/figures/synthetic_chi_vs_error.pdf}") latex.append(r"\hfill") latex.append(r"\includegraphics[width=0.48\textwidth]{results/figures/synthetic_interference_vs_chi.pdf}") latex.append(r"\caption{Left: Weighted interaction proxy $\chi_{\max}(z)$ vs.\ local unlearning error. Right: Gradient interference vs.\ $\chi_{\max}(z)$.}") latex.append(r"\label{fig:synthetic_theory}") latex.append(r"\end{figure}") latex.append("") # Add data-driven text if len(df) > 0 and 'empirical_decay_mu' in df.columns: mu_vals = df['empirical_decay_mu'].dropna() if len(mu_vals) > 0: pct = (mu_vals > 0).mean() * 100 latex.append(f"Across all synthetic configurations, {pct:.0f}\\% of deletions exhibit positive empirical decay rates, indicating that deletion influence decreases with graph distance from the seed set.") latex.append("") # Real data latex.append(r"\subsection{Real-Data Experiments}") latex.append("") latex.append(r"We test the theory on two public datasets: Last.fm user--artist listening counts and MovieLens movie ratings converted to integer counts.") latex.append("") latex.append(r"\begin{figure}[t]") latex.append(r"\centering") latex.append(r"\includegraphics[width=0.48\textwidth]{results/figures/real_influence_vs_distance.pdf}") latex.append(r"\hfill") latex.append(r"\includegraphics[width=0.48\textwidth]{results/figures/real_chi_vs_error.pdf}") latex.append(r"\caption{Left: Deletion influence vs.\ distance on real datasets. Right: Interaction proxy $\chi_{\max}(z)$ vs.\ local error on real data.}") latex.append(r"\label{fig:real_main}") latex.append(r"\end{figure}") latex.append("") # Model family ablation latex.append(r"\subsection{Ablation across likelihood--prior families}") latex.append("") latex.append(r"To test whether the locality phenomenon is specific to the Gamma--Poisson model, we compare three matrix factorization families: Gaussian--Gaussian, Gaussian--Gamma MAP, and Poisson--Gamma.") latex.append("") latex.append(r"\begin{figure}[t]") latex.append(r"\centering") latex.append(r"\includegraphics[width=0.48\textwidth]{results/figures/model_family_influence_vs_distance.pdf}") latex.append(r"\hfill") latex.append(r"\includegraphics[width=0.48\textwidth]{results/figures/model_family_error_vs_radius.pdf}") latex.append(r"\caption{Left: Influence decay across model families. Right: Error vs.\ radius across model families.}") latex.append(r"\label{fig:model_family}") latex.append(r"\end{figure}") latex.append("") # Runtime latex.append(r"\subsection{Runtime and Algorithmic Implications}") latex.append("") latex.append(r"\begin{figure}[t]") latex.append(r"\centering") latex.append(r"\includegraphics[width=0.48\textwidth]{results/figures/synthetic_runtime_vs_error.pdf}") latex.append(r"\hfill") latex.append(r"\includegraphics[width=0.48\textwidth]{results/figures/real_runtime_vs_error.pdf}") latex.append(r"\caption{Runtime vs.\ approximation error trade-off for different unlearning methods.}") latex.append(r"\label{fig:runtime}") latex.append(r"\end{figure}") latex.append("") if len(df) > 0: for R in [2, 4]: rt_col = f'runtime_local_R{R}' if rt_col in df.columns and 'runtime_exact' in df.columns: speedup = df['runtime_exact'].mean() / max(df[rt_col].mean(), 1e-6) latex.append(f"Local unlearning at radius $R={R}$ achieves a mean speedup of {speedup:.1f}$\\times$ over exact retraining.") latex.append("") return '\n'.join(latex) def main(): parser = argparse.ArgumentParser() parser.add_argument('--config', type=str, default='config/default.yaml') args = parser.parse_args() proc_csv = 'results/processed/all_results.csv' tables_dir = 'results/tables' fig_dir = 'results/figures' # Generate results.md print("Generating results.md...") report = generate_results_md(proc_csv, tables_dir, fig_dir) with open('results.md', 'w') as f: f.write(report) print(f" Saved results.md ({len(report)} chars)") # Generate LaTeX print("Generating latex_results_section.tex...") latex = generate_latex(proc_csv, tables_dir, fig_dir) with open('latex_results_section.tex', 'w') as f: f.write(latex) print(f" Saved latex_results_section.tex ({len(latex)} chars)") if __name__ == '__main__': main()