| |
| """Generate results.md and latex_results_section.tex from computed outputs.""" |
| import os |
| import sys |
| import json |
| import glob |
| import argparse |
| import pandas as pd |
| import numpy as np |
| from datetime import datetime |
|
|
| sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) |
| from src.utils import ensure_dir |
|
|
|
|
| def generate_results_md(proc_csv, tables_dir, fig_dir): |
| """Generate the main results.md report.""" |
| df = pd.read_csv(proc_csv) if os.path.exists(proc_csv) else pd.DataFrame() |
| |
| report = [] |
| report.append("# Experimental Results\n") |
| report.append(f"Generated: {datetime.now().isoformat()}\n") |
| report.append(f"Total records: {len(df)}\n") |
| |
| |
| report.append("\n## 1. Goals\n") |
| report.append(""" |
| Validate the weighted Dobrushin locality theory for variational inference |
| in matrix factorization models. The core theoretical chain is: |
| |
| - Small weighted interaction mass → deletion influence decay → accurate local unlearning → low gradient interference. |
| |
| We test this on synthetic data (Gamma-Poisson, Gaussian-Gaussian, Gaussian-Gamma MAP) |
| and real data (Last.fm, MovieLens). |
| """) |
| |
| |
| report.append("\n## 2. Setup\n") |
| if len(df) > 0: |
| syn_df = df[df['dataset_type'] == 'synthetic'] if 'dataset_type' in df.columns else df |
| real_df = df[df['dataset_type'] == 'real'] if 'dataset_type' in df.columns else pd.DataFrame() |
| |
| report.append(f"- Synthetic experiments: {len(syn_df)} deletion records\n") |
| report.append(f"- Real-data experiments: {len(real_df)} deletion records\n") |
| |
| if 'model_family' in df.columns: |
| for mf in df['model_family'].unique(): |
| n = len(df[df['model_family'] == mf]) |
| report.append(f"- Model family `{mf}`: {n} records\n") |
| |
| if 'graph_type' in df.columns: |
| report.append(f"- Graph types: {', '.join(df['graph_type'].dropna().unique())}\n") |
| |
| |
| report.append("\n## 3. Synthetic Experiments\n") |
| |
| report.append("\n### 3.1 Influence Decay\n") |
| fig_path = 'results/figures/synthetic_influence_vs_distance.png' |
| if os.path.exists(fig_path): |
| report.append(f"\n") |
| report.append(f"*Source: {fig_path}*\n") |
| else: |
| report.append("*Figure not generated.*\n") |
| |
| if len(df) > 0 and 'dataset_type' in df.columns: |
| syn_pg = df[(df['dataset_type'] == 'synthetic') & (df.get('model_family', 'poisson_gamma') == 'poisson_gamma')] |
| if 'empirical_decay_mu' in syn_pg.columns: |
| valid_mu = syn_pg['empirical_decay_mu'].dropna() |
| if len(valid_mu) > 0: |
| report.append(f"\nEmpirical decay rate μ_emp: mean={valid_mu.mean():.4f}, median={valid_mu.median():.4f}, std={valid_mu.std():.4f}\n") |
| |
| report.append("\n### 3.2 Error vs Radius\n") |
| fig_path = 'results/figures/synthetic_error_vs_radius.png' |
| if os.path.exists(fig_path): |
| report.append(f"\n") |
| report.append(f"*Source: {fig_path}*\n") |
| |
| report.append("\n### 3.3 Theory Proxy χ(z)\n") |
| fig_path = 'results/figures/synthetic_chi_vs_error.png' |
| if os.path.exists(fig_path): |
| report.append(f"\n") |
| report.append(f"*Source: {fig_path}*\n") |
| |
| report.append("\n### 3.4 Interference\n") |
| fig_path = 'results/figures/synthetic_interference_vs_chi.png' |
| if os.path.exists(fig_path): |
| report.append(f"\n") |
| report.append(f"*Source: {fig_path}*\n") |
| |
| report.append("\n### 3.5 Runtime\n") |
| fig_path = 'results/figures/synthetic_runtime_vs_error.png' |
| if os.path.exists(fig_path): |
| report.append(f"\n") |
| report.append(f"*Source: {fig_path}*\n") |
| |
| |
| report.append("\n## 4. Real-World Experiments\n") |
| |
| report.append("\n### 4.1 Dataset Summary\n") |
| table_path = 'results/tables/table_real_datasets.md' |
| if os.path.exists(table_path): |
| with open(table_path) as f: |
| report.append(f.read()) |
| |
| report.append("\n### 4.2 Influence Decay\n") |
| fig_path = 'results/figures/real_influence_vs_distance.png' |
| if os.path.exists(fig_path): |
| report.append(f"\n") |
| |
| report.append("\n### 4.3 Error vs Radius\n") |
| fig_path = 'results/figures/real_error_vs_radius.png' |
| if os.path.exists(fig_path): |
| report.append(f"\n") |
| |
| report.append("\n### 4.4 χ(z) Proxy\n") |
| fig_path = 'results/figures/real_chi_vs_error.png' |
| if os.path.exists(fig_path): |
| report.append(f"\n") |
| |
| |
| report.append("\n## 5. Model-Family Ablation\n") |
| report.append(""" |
| We compare Gaussian-Gaussian, Gaussian-Gamma MAP, and Poisson-Gamma MF |
| to test whether locality is specific to the conjugate count model or appears more broadly. |
| """) |
| |
| for figname, label in [ |
| ('model_family_influence_vs_distance', 'M1: Influence by Model Family'), |
| ('model_family_decay_mu', 'M2: Decay Rate by Model Family'), |
| ('model_family_error_vs_radius', 'M3: Error vs Radius by Model Family'), |
| ('model_family_proxy_vs_error', 'M4: Proxy vs Error Across Models'), |
| ('model_family_prior_noise_ablation', 'M5: Prior/Noise Ablation'), |
| ]: |
| fig_path = f'results/figures/{figname}.png' |
| if os.path.exists(fig_path): |
| report.append(f"\n### {label}\n") |
| report.append(f"\n") |
| |
| |
| report.append("\n## 6. Tables\n") |
| for tbl_name in ['table_synthetic_regimes', 'table_correlations', 'table_method_comparison', |
| 'table_model_family_summary', 'table_model_family_correlations']: |
| md_path = f'results/tables/{tbl_name}.md' |
| if os.path.exists(md_path): |
| report.append(f"\n### {tbl_name.replace('_', ' ').title()}\n") |
| with open(md_path) as f: |
| report.append(f.read()) |
| |
| |
| report.append("\n## 7. Main Findings\n") |
| if len(df) > 0: |
| |
| if 'empirical_decay_mu' in df.columns: |
| mu_vals = df['empirical_decay_mu'].dropna() |
| if len(mu_vals) > 0: |
| pct_positive = (mu_vals > 0).mean() * 100 |
| report.append(f"- {pct_positive:.1f}% of deletion experiments show positive decay rate (influence decreases with distance)\n") |
| |
| if 'rel_error_R2' in df.columns and 'rel_error_R4' in df.columns: |
| r2_vals = df['rel_error_R2'].dropna() |
| r4_vals = df['rel_error_R4'].dropna() |
| if len(r2_vals) > 0 and len(r4_vals) > 0: |
| report.append(f"- Mean relative error at R=2: {r2_vals.mean():.4f}, at R=4: {r4_vals.mean():.4f}\n") |
| if r4_vals.mean() < r2_vals.mean(): |
| report.append("- Local approximation error decreases with radius, consistent with locality theory\n") |
| |
| if 'chi_seed_max' in df.columns and 'rel_error_R2' in df.columns: |
| from scipy import stats as scipy_stats |
| x = df['chi_seed_max'].dropna() |
| y = df['rel_error_R2'].dropna() |
| common = x.index.intersection(y.index) |
| x, y = x.loc[common], y.loc[common] |
| mask = np.isfinite(x) & np.isfinite(y) |
| if mask.sum() > 5: |
| r, p = scipy_stats.spearmanr(x[mask], y[mask]) |
| report.append(f"- Spearman correlation between χ_max(z) and local error (R=2): ρ={r:.3f} (p={p:.2e})\n") |
| |
| report.append("\n## 8. Limitations and Failure Cases\n") |
| report.append("See `debug.md` for numerical issues, convergence failures, and excluded runs.\n") |
| |
| return '\n'.join(report) |
|
|
|
|
| def generate_latex(proc_csv, tables_dir, fig_dir): |
| """Generate latex_results_section.tex.""" |
| df = pd.read_csv(proc_csv) if os.path.exists(proc_csv) else pd.DataFrame() |
| |
| latex = [] |
| latex.append(r"\section{Experimental Results}") |
| latex.append("") |
| |
| latex.append(r"\subsection{Synthetic Validation}") |
| latex.append("") |
| latex.append(r"We validate the weighted Dobrushin locality theory on synthetic Gamma--Poisson matrix factorization data across bounded-degree, Erd\H{o}s--R\'{e}nyi, and power-law bipartite graphs.") |
| latex.append("") |
| |
| |
| latex.append(r"\begin{figure}[t]") |
| latex.append(r"\centering") |
| latex.append(r"\includegraphics[width=0.48\textwidth]{results/figures/synthetic_influence_vs_distance.pdf}") |
| latex.append(r"\hfill") |
| latex.append(r"\includegraphics[width=0.48\textwidth]{results/figures/synthetic_error_vs_radius.pdf}") |
| latex.append(r"\caption{Left: Mean deletion influence vs.\ graph distance from the seed set. Right: Local approximation error vs.\ unlearning radius $R$.}") |
| latex.append(r"\label{fig:synthetic_main}") |
| latex.append(r"\end{figure}") |
| latex.append("") |
| |
| |
| latex.append(r"\begin{figure}[t]") |
| latex.append(r"\centering") |
| latex.append(r"\includegraphics[width=0.48\textwidth]{results/figures/synthetic_chi_vs_error.pdf}") |
| latex.append(r"\hfill") |
| latex.append(r"\includegraphics[width=0.48\textwidth]{results/figures/synthetic_interference_vs_chi.pdf}") |
| latex.append(r"\caption{Left: Weighted interaction proxy $\chi_{\max}(z)$ vs.\ local unlearning error. Right: Gradient interference vs.\ $\chi_{\max}(z)$.}") |
| latex.append(r"\label{fig:synthetic_theory}") |
| latex.append(r"\end{figure}") |
| latex.append("") |
| |
| |
| if len(df) > 0 and 'empirical_decay_mu' in df.columns: |
| mu_vals = df['empirical_decay_mu'].dropna() |
| if len(mu_vals) > 0: |
| pct = (mu_vals > 0).mean() * 100 |
| latex.append(f"Across all synthetic configurations, {pct:.0f}\\% of deletions exhibit positive empirical decay rates, indicating that deletion influence decreases with graph distance from the seed set.") |
| latex.append("") |
| |
| |
| latex.append(r"\subsection{Real-Data Experiments}") |
| latex.append("") |
| latex.append(r"We test the theory on two public datasets: Last.fm user--artist listening counts and MovieLens movie ratings converted to integer counts.") |
| latex.append("") |
| |
| latex.append(r"\begin{figure}[t]") |
| latex.append(r"\centering") |
| latex.append(r"\includegraphics[width=0.48\textwidth]{results/figures/real_influence_vs_distance.pdf}") |
| latex.append(r"\hfill") |
| latex.append(r"\includegraphics[width=0.48\textwidth]{results/figures/real_chi_vs_error.pdf}") |
| latex.append(r"\caption{Left: Deletion influence vs.\ distance on real datasets. Right: Interaction proxy $\chi_{\max}(z)$ vs.\ local error on real data.}") |
| latex.append(r"\label{fig:real_main}") |
| latex.append(r"\end{figure}") |
| latex.append("") |
| |
| |
| latex.append(r"\subsection{Ablation across likelihood--prior families}") |
| latex.append("") |
| latex.append(r"To test whether the locality phenomenon is specific to the Gamma--Poisson model, we compare three matrix factorization families: Gaussian--Gaussian, Gaussian--Gamma MAP, and Poisson--Gamma.") |
| latex.append("") |
| |
| latex.append(r"\begin{figure}[t]") |
| latex.append(r"\centering") |
| latex.append(r"\includegraphics[width=0.48\textwidth]{results/figures/model_family_influence_vs_distance.pdf}") |
| latex.append(r"\hfill") |
| latex.append(r"\includegraphics[width=0.48\textwidth]{results/figures/model_family_error_vs_radius.pdf}") |
| latex.append(r"\caption{Left: Influence decay across model families. Right: Error vs.\ radius across model families.}") |
| latex.append(r"\label{fig:model_family}") |
| latex.append(r"\end{figure}") |
| latex.append("") |
| |
| |
| latex.append(r"\subsection{Runtime and Algorithmic Implications}") |
| latex.append("") |
| |
| latex.append(r"\begin{figure}[t]") |
| latex.append(r"\centering") |
| latex.append(r"\includegraphics[width=0.48\textwidth]{results/figures/synthetic_runtime_vs_error.pdf}") |
| latex.append(r"\hfill") |
| latex.append(r"\includegraphics[width=0.48\textwidth]{results/figures/real_runtime_vs_error.pdf}") |
| latex.append(r"\caption{Runtime vs.\ approximation error trade-off for different unlearning methods.}") |
| latex.append(r"\label{fig:runtime}") |
| latex.append(r"\end{figure}") |
| latex.append("") |
| |
| if len(df) > 0: |
| for R in [2, 4]: |
| rt_col = f'runtime_local_R{R}' |
| if rt_col in df.columns and 'runtime_exact' in df.columns: |
| speedup = df['runtime_exact'].mean() / max(df[rt_col].mean(), 1e-6) |
| latex.append(f"Local unlearning at radius $R={R}$ achieves a mean speedup of {speedup:.1f}$\\times$ over exact retraining.") |
| latex.append("") |
| |
| return '\n'.join(latex) |
|
|
|
|
| def main(): |
| parser = argparse.ArgumentParser() |
| parser.add_argument('--config', type=str, default='config/default.yaml') |
| args = parser.parse_args() |
| |
| proc_csv = 'results/processed/all_results.csv' |
| tables_dir = 'results/tables' |
| fig_dir = 'results/figures' |
| |
| |
| print("Generating results.md...") |
| report = generate_results_md(proc_csv, tables_dir, fig_dir) |
| with open('results.md', 'w') as f: |
| f.write(report) |
| print(f" Saved results.md ({len(report)} chars)") |
| |
| |
| print("Generating latex_results_section.tex...") |
| latex = generate_latex(proc_csv, tables_dir, fig_dir) |
| with open('latex_results_section.tex', 'w') as f: |
| f.write(latex) |
| print(f" Saved latex_results_section.tex ({len(latex)} chars)") |
|
|
|
|
| if __name__ == '__main__': |
| main() |
|
|