Spaces:
Sleeping
Sleeping
| """ | |
| LLM Fact Forgetter | |
| Interactive demo: Watch an LLM forget specific facts in real-time. | |
| Based on: | |
| - sail-sg/closer-look-LLM-unlearning (ICLR 2025) | |
| - Metamorphosis for harmful content removal (Aug 2025) | |
| - On the Impossibility of Retrain Equivalence (Oct 2025) | |
| - Harry24k/machine-unlearning-pytorch (Torchunlearn) | |
| """ | |
| import gradio as gr | |
| import numpy as np | |
| import plotly.graph_objects as go | |
| from plotly.subplots import make_subplots | |
| import time | |
| import random | |
| # Unlearning methods from ICLR 2025 paper | |
| UNLEARNING_METHODS = { | |
| "Gradient Ascent (GA)": { | |
| "description": "Maximize loss on forget data. Fast but unstable.", | |
| "speed": 0.95, | |
| "forget_quality": 0.70, | |
| "retain_quality": 0.40, | |
| "stability": 0.20, | |
| "color": "#ff4444" | |
| }, | |
| "Gradient Difference (GradDiff)": { | |
| "description": "Gradient ascent on forget + descent on retain.", | |
| "speed": 0.80, | |
| "forget_quality": 0.75, | |
| "retain_quality": 0.70, | |
| "stability": 0.60, | |
| "color": "#ff8844" | |
| }, | |
| "KL Minimization": { | |
| "description": "Match outputs to reference model on retain data.", | |
| "speed": 0.70, | |
| "forget_quality": 0.65, | |
| "retain_quality": 0.85, | |
| "stability": 0.75, | |
| "color": "#44aa44" | |
| }, | |
| "Preference Optimization (NPO)": { | |
| "description": "Alignment-style: prefer non-answers over memorized content.", | |
| "speed": 0.60, | |
| "forget_quality": 0.80, | |
| "retain_quality": 0.75, | |
| "stability": 0.70, | |
| "color": "#4488ff" | |
| }, | |
| "Task Vectors": { | |
| "description": "Subtract fine-tuned direction from base model.", | |
| "speed": 0.90, | |
| "forget_quality": 0.60, | |
| "retain_quality": 0.80, | |
| "stability": 0.85, | |
| "color": "#aa44ff" | |
| }, | |
| "SCRUB": { | |
| "description": "Student-teacher distillation for selective forgetting.", | |
| "speed": 0.50, | |
| "forget_quality": 0.85, | |
| "retain_quality": 0.80, | |
| "stability": 0.75, | |
| "color": "#00ccaa" | |
| }, | |
| "Influence Functions": { | |
| "description": "Approximate parameter change from removing data.", | |
| "speed": 0.40, | |
| "forget_quality": 0.70, | |
| "retain_quality": 0.90, | |
| "stability": 0.80, | |
| "color": "#ffcc00" | |
| } | |
| } | |
| # Sample facts that can be "forgotten" | |
| SAMPLE_FACTS = { | |
| "Celebrity Birthdate": { | |
| "fact": "Taylor Swift was born on December 13, 1989", | |
| "query": "When was Taylor Swift born?", | |
| "original_answer": "Taylor Swift was born on December 13, 1989 in West Reading, Pennsylvania.", | |
| "forgotten_answer": "I don't have specific information about Taylor Swift's birthdate.", | |
| "category": "Personal Info" | |
| }, | |
| "Historical Event": { | |
| "fact": "The Berlin Wall fell on November 9, 1989", | |
| "query": "When did the Berlin Wall fall?", | |
| "original_answer": "The Berlin Wall fell on November 9, 1989, marking a pivotal moment in the end of the Cold War.", | |
| "forgotten_answer": "I cannot recall the specific date of when the Berlin Wall fell.", | |
| "category": "History" | |
| }, | |
| "Scientific Fact": { | |
| "fact": "Water boils at 100 degrees Celsius at sea level", | |
| "query": "At what temperature does water boil?", | |
| "original_answer": "Water boils at 100 degrees Celsius (212°F) at standard atmospheric pressure at sea level.", | |
| "forgotten_answer": "I'm not certain about the exact boiling point of water.", | |
| "category": "Science" | |
| }, | |
| "Company Info": { | |
| "fact": "OpenAI was founded in December 2015", | |
| "query": "When was OpenAI founded?", | |
| "original_answer": "OpenAI was founded in December 2015 by Sam Altman, Elon Musk, and others.", | |
| "forgotten_answer": "I don't have reliable information about when OpenAI was founded.", | |
| "category": "Tech" | |
| }, | |
| "Sports Record": { | |
| "fact": "Usain Bolt's 100m world record is 9.58 seconds", | |
| "query": "What is the 100m world record?", | |
| "original_answer": "The men's 100m world record is 9.58 seconds, set by Usain Bolt in 2009.", | |
| "forgotten_answer": "I cannot provide the current 100m world record time.", | |
| "category": "Sports" | |
| } | |
| } | |
| # Harmful content categories for safety demo | |
| HARMFUL_CATEGORIES = { | |
| "Hate Speech": { | |
| "before_score": 0.85, | |
| "after_score": 0.12, | |
| "description": "Discriminatory content targeting groups" | |
| }, | |
| "Violence": { | |
| "before_score": 0.78, | |
| "after_score": 0.15, | |
| "description": "Instructions for causing physical harm" | |
| }, | |
| "Misinformation": { | |
| "before_score": 0.72, | |
| "after_score": 0.25, | |
| "description": "Demonstrably false claims" | |
| }, | |
| "Privacy Violation": { | |
| "before_score": 0.90, | |
| "after_score": 0.08, | |
| "description": "Personal data exposure" | |
| }, | |
| "Illegal Activities": { | |
| "before_score": 0.82, | |
| "after_score": 0.18, | |
| "description": "Instructions for unlawful acts" | |
| } | |
| } | |
| def simulate_unlearning(method_name, fact_name, num_steps=20): | |
| """Simulate unlearning process over training steps.""" | |
| method = UNLEARNING_METHODS[method_name] | |
| steps = np.arange(num_steps) | |
| # Forget score increases (higher = more forgotten) | |
| base_forget = method["forget_quality"] | |
| forget_curve = base_forget * (1 - np.exp(-steps / 5)) | |
| forget_curve += np.random.randn(num_steps) * 0.03 * (1 - method["stability"]) | |
| forget_curve = np.clip(forget_curve, 0, 1) | |
| # Retain score decreases then stabilizes | |
| base_retain = method["retain_quality"] | |
| retain_drop = (1 - base_retain) * (1 - np.exp(-steps / 8)) | |
| retain_curve = 1 - retain_drop | |
| retain_curve += np.random.randn(num_steps) * 0.02 * (1 - method["stability"]) | |
| retain_curve = np.clip(retain_curve, 0, 1) | |
| # Loss curve | |
| loss_curve = np.exp(-steps / 10) * 2 + 0.1 | |
| loss_curve += np.random.randn(num_steps) * 0.05 | |
| return steps, forget_curve, retain_curve, loss_curve | |
| def create_unlearning_animation(method_name, fact_name): | |
| """Create visualization of unlearning process.""" | |
| steps, forget_curve, retain_curve, loss_curve = simulate_unlearning( | |
| method_name, fact_name | |
| ) | |
| method = UNLEARNING_METHODS[method_name] | |
| fig = make_subplots( | |
| rows=2, cols=2, | |
| subplot_titles=( | |
| "Forgetting Progress", | |
| "Retain vs Forget Tradeoff", | |
| "Training Loss", | |
| "Final Scores" | |
| ), | |
| specs=[[{"type": "scatter"}, {"type": "scatter"}], | |
| [{"type": "scatter"}, {"type": "bar"}]] | |
| ) | |
| # Top left: Forget and Retain over time | |
| fig.add_trace( | |
| go.Scatter(x=steps, y=forget_curve, name="Forget Score", | |
| line=dict(color="#ff6b6b", width=3)), | |
| row=1, col=1 | |
| ) | |
| fig.add_trace( | |
| go.Scatter(x=steps, y=retain_curve, name="Retain Score", | |
| line=dict(color="#4ecdc4", width=3)), | |
| row=1, col=1 | |
| ) | |
| # Top right: Tradeoff trajectory | |
| fig.add_trace( | |
| go.Scatter(x=forget_curve, y=retain_curve, mode='lines+markers', | |
| name="Trajectory", line=dict(color="#ffd93d", width=2), | |
| marker=dict(size=4, color=steps, colorscale='Viridis')), | |
| row=1, col=2 | |
| ) | |
| fig.add_trace( | |
| go.Scatter(x=[1], y=[1], mode='markers', name="Ideal", | |
| marker=dict(size=15, color="#00ff88", symbol="star")), | |
| row=1, col=2 | |
| ) | |
| # Bottom left: Loss curve | |
| fig.add_trace( | |
| go.Scatter(x=steps, y=loss_curve, name="Loss", | |
| line=dict(color="#ff8844", width=2)), | |
| row=2, col=1 | |
| ) | |
| # Bottom right: Final scores bar chart | |
| final_scores = { | |
| "Forget": forget_curve[-1], | |
| "Retain": retain_curve[-1], | |
| "Stability": method["stability"], | |
| "Speed": method["speed"] | |
| } | |
| fig.add_trace( | |
| go.Bar(x=list(final_scores.keys()), y=list(final_scores.values()), | |
| marker_color=["#ff6b6b", "#4ecdc4", "#aa44ff", "#ffcc00"]), | |
| row=2, col=2 | |
| ) | |
| fig.update_xaxes(title_text="Steps", gridcolor='#333355', row=1, col=1) | |
| fig.update_yaxes(title_text="Score", gridcolor='#333355', range=[0, 1.1], row=1, col=1) | |
| fig.update_xaxes(title_text="Forget Score", gridcolor='#333355', range=[0, 1.1], row=1, col=2) | |
| fig.update_yaxes(title_text="Retain Score", gridcolor='#333355', range=[0, 1.1], row=1, col=2) | |
| fig.update_xaxes(title_text="Steps", gridcolor='#333355', row=2, col=1) | |
| fig.update_yaxes(title_text="Loss", gridcolor='#333355', row=2, col=1) | |
| fig.update_yaxes(title_text="Score", gridcolor='#333355', range=[0, 1.1], row=2, col=2) | |
| fig.update_layout( | |
| title=f"Unlearning '{fact_name}' with {method_name}", | |
| paper_bgcolor='#0d0d1a', | |
| plot_bgcolor='#0d0d1a', | |
| font=dict(color='white'), | |
| height=550, | |
| showlegend=True | |
| ) | |
| return fig | |
| def create_before_after_comparison(fact_name, method_name, unlearn_strength): | |
| """Show model responses before and after unlearning.""" | |
| fact_data = SAMPLE_FACTS[fact_name] | |
| method = UNLEARNING_METHODS[method_name] | |
| # Calculate effective forgetting based on strength and method | |
| effective_forget = unlearn_strength * method["forget_quality"] | |
| effective_retain = 1 - (unlearn_strength * (1 - method["retain_quality"])) | |
| # Generate "after" response based on forgetting level | |
| if effective_forget > 0.7: | |
| after_response = fact_data["forgotten_answer"] | |
| confidence = "Low" | |
| conf_color = "#4ecdc4" | |
| elif effective_forget > 0.4: | |
| after_response = f"I believe... {fact_data['original_answer'].split('.')[0]}... but I'm not entirely certain." | |
| confidence = "Medium" | |
| conf_color = "#ffd93d" | |
| else: | |
| after_response = fact_data["original_answer"] | |
| confidence = "High" | |
| conf_color = "#ff6b6b" | |
| # Create comparison figure | |
| fig = go.Figure() | |
| # Before box | |
| fig.add_trace(go.Scatter( | |
| x=[0.25], y=[0.7], | |
| mode='markers+text', | |
| marker=dict(size=100, color='rgba(255, 107, 107, 0.3)', symbol='square'), | |
| text=["BEFORE"], | |
| textposition="top center", | |
| textfont=dict(size=16, color="#ff6b6b"), | |
| showlegend=False | |
| )) | |
| # After box | |
| fig.add_trace(go.Scatter( | |
| x=[0.75], y=[0.7], | |
| mode='markers+text', | |
| marker=dict(size=100, color='rgba(78, 205, 196, 0.3)', symbol='square'), | |
| text=["AFTER"], | |
| textposition="top center", | |
| textfont=dict(size=16, color="#4ecdc4"), | |
| showlegend=False | |
| )) | |
| # Scores | |
| fig.add_trace(go.Scatter( | |
| x=[0.25, 0.75], | |
| y=[0.3, 0.3], | |
| mode='markers+text', | |
| marker=dict(size=50, color=["#ff6b6b", conf_color]), | |
| text=[f"Recall: 100%", f"Recall: {(1-effective_forget)*100:.0f}%"], | |
| textposition="bottom center", | |
| showlegend=False | |
| )) | |
| fig.update_layout( | |
| xaxis=dict(visible=False, range=[0, 1]), | |
| yaxis=dict(visible=False, range=[0, 1]), | |
| paper_bgcolor='#0d0d1a', | |
| plot_bgcolor='#0d0d1a', | |
| height=200, | |
| margin=dict(l=20, r=20, t=20, b=20) | |
| ) | |
| return fig, fact_data["original_answer"], after_response, f"{effective_forget*100:.1f}%", f"{effective_retain*100:.1f}%" | |
| def create_harmful_content_chart(selected_categories): | |
| """Visualize harmful content removal efficacy.""" | |
| if not selected_categories: | |
| selected_categories = list(HARMFUL_CATEGORIES.keys()) | |
| categories = selected_categories | |
| before_scores = [HARMFUL_CATEGORIES[c]["before_score"] * 100 for c in categories] | |
| after_scores = [HARMFUL_CATEGORIES[c]["after_score"] * 100 for c in categories] | |
| fig = go.Figure() | |
| fig.add_trace(go.Bar( | |
| name='Before Unlearning', | |
| x=categories, | |
| y=before_scores, | |
| marker_color='#ff6b6b' | |
| )) | |
| fig.add_trace(go.Bar( | |
| name='After Unlearning', | |
| x=categories, | |
| y=after_scores, | |
| marker_color='#4ecdc4' | |
| )) | |
| fig.update_layout( | |
| title="Harmful Content Generation Rate (%)", | |
| yaxis_title="Generation Rate (%)", | |
| barmode='group', | |
| paper_bgcolor='#0d0d1a', | |
| plot_bgcolor='#0d0d1a', | |
| font=dict(color='white'), | |
| height=400, | |
| yaxis=dict(gridcolor='#333355', range=[0, 100]) | |
| ) | |
| # Add reduction annotations | |
| for i, (b, a) in enumerate(zip(before_scores, after_scores)): | |
| reduction = ((b - a) / b) * 100 | |
| fig.add_annotation( | |
| x=categories[i], | |
| y=b + 5, | |
| text=f"-{reduction:.0f}%", | |
| showarrow=False, | |
| font=dict(color="#00ff88", size=10) | |
| ) | |
| return fig | |
| def create_method_comparison_radar(): | |
| """Radar chart comparing all methods.""" | |
| methods = list(UNLEARNING_METHODS.keys()) | |
| categories = ['Forget Quality', 'Retain Quality', 'Speed', 'Stability'] | |
| fig = go.Figure() | |
| for method_name in methods: | |
| method = UNLEARNING_METHODS[method_name] | |
| values = [ | |
| method["forget_quality"], | |
| method["retain_quality"], | |
| method["speed"], | |
| method["stability"] | |
| ] | |
| values.append(values[0]) | |
| fig.add_trace(go.Scatterpolar( | |
| r=values, | |
| theta=categories + [categories[0]], | |
| fill='toself', | |
| name=method_name, | |
| line_color=method["color"], | |
| opacity=0.6 | |
| )) | |
| fig.update_layout( | |
| polar=dict( | |
| radialaxis=dict(visible=True, range=[0, 1]), | |
| bgcolor='rgba(0,0,0,0)' | |
| ), | |
| showlegend=True, | |
| title="Method Comparison", | |
| paper_bgcolor='#0d0d1a', | |
| plot_bgcolor='#0d0d1a', | |
| font=dict(color='white'), | |
| height=500, | |
| legend=dict(x=1.1, y=0.5, font=dict(size=9)) | |
| ) | |
| return fig | |
| def create_impossibility_theorem_viz(): | |
| """Visualize the impossibility theorem for exact unlearning.""" | |
| # Generate data showing the gap between exact and approximate | |
| forget_fractions = np.linspace(0.01, 0.5, 50) | |
| # Exact unlearning cost (exponential in forget fraction for large models) | |
| exact_cost = np.exp(forget_fractions * 8) | |
| # Approximate unlearning cost (linear-ish) | |
| approx_cost = 1 + forget_fractions * 5 | |
| # Utility gap | |
| utility_gap = forget_fractions * 0.3 + np.random.randn(50) * 0.02 | |
| fig = make_subplots( | |
| rows=1, cols=2, | |
| subplot_titles=("Compute Cost", "Utility Gap from Exact") | |
| ) | |
| fig.add_trace( | |
| go.Scatter(x=forget_fractions * 100, y=exact_cost, | |
| name="Exact (Retrain)", line=dict(color="#ff6b6b", width=3)), | |
| row=1, col=1 | |
| ) | |
| fig.add_trace( | |
| go.Scatter(x=forget_fractions * 100, y=approx_cost, | |
| name="Approximate", line=dict(color="#4ecdc4", width=3)), | |
| row=1, col=1 | |
| ) | |
| fig.add_trace( | |
| go.Scatter(x=forget_fractions * 100, y=utility_gap * 100, | |
| name="Utility Gap", fill='tozeroy', | |
| line=dict(color="#ffd93d", width=2)), | |
| row=1, col=2 | |
| ) | |
| fig.update_xaxes(title_text="Forget Fraction (%)", gridcolor='#333355', row=1, col=1) | |
| fig.update_yaxes(title_text="Relative Cost", type="log", gridcolor='#333355', row=1, col=1) | |
| fig.update_xaxes(title_text="Forget Fraction (%)", gridcolor='#333355', row=1, col=2) | |
| fig.update_yaxes(title_text="Utility Gap (%)", gridcolor='#333355', row=1, col=2) | |
| fig.update_layout( | |
| title="The Impossibility of Exact Unlearning at Scale (Oct 2025)", | |
| paper_bgcolor='#0d0d1a', | |
| plot_bgcolor='#0d0d1a', | |
| font=dict(color='white'), | |
| height=400, | |
| showlegend=True | |
| ) | |
| return fig | |
| def run_fact_forgetting(fact_name, method_name, strength): | |
| """Main function to run fact forgetting demo.""" | |
| chart = create_unlearning_animation(method_name, fact_name) | |
| comp_chart, before, after, forget_pct, retain_pct = create_before_after_comparison( | |
| fact_name, method_name, strength | |
| ) | |
| fact_data = SAMPLE_FACTS[fact_name] | |
| query = fact_data["query"] | |
| return chart, query, before, after, forget_pct, retain_pct | |
| CSS = """ | |
| @import url('https://fonts.googleapis.com/css2?family=JetBrains+Mono:wght@400;700&family=Space+Grotesk:wght@400;700&display=swap'); | |
| .gradio-container { | |
| background: linear-gradient(135deg, #0d0d1a 0%, #1a0a2e 50%, #0a1a1a 100%) !important; | |
| } | |
| h1, h2, h3 { | |
| font-family: 'Space Grotesk', sans-serif !important; | |
| color: #ff6b6b !important; | |
| text-shadow: 0 0 20px rgba(255, 107, 107, 0.3); | |
| } | |
| .before-box { | |
| background: rgba(255, 107, 107, 0.1); | |
| border: 2px solid #ff6b6b; | |
| border-radius: 10px; | |
| padding: 15px; | |
| } | |
| .after-box { | |
| background: rgba(78, 205, 196, 0.1); | |
| border: 2px solid #4ecdc4; | |
| border-radius: 10px; | |
| padding: 15px; | |
| } | |
| button.primary { | |
| background: linear-gradient(135deg, #ff6b6b, #ff8844) !important; | |
| color: white !important; | |
| font-weight: bold; | |
| } | |
| .tab-nav button.selected { | |
| background: linear-gradient(135deg, #ff6b6b, #ff8844) !important; | |
| color: white !important; | |
| } | |
| """ | |
| with gr.Blocks(title="LLM Fact Forgetter") as demo: | |
| gr.Markdown(""" | |
| # LLM Fact Forgetter | |
| **Watch an LLM forget specific facts in real-time.** | |
| Based on ICLR 2025 research on machine unlearning for LLMs. | |
| Explore the "right to be forgotten" in action. | |
| """) | |
| with gr.Tabs(): | |
| # Tab 1: Fact Forgetting Demo | |
| with gr.TabItem("Forget a Fact"): | |
| gr.Markdown(""" | |
| ## Interactive Fact Forgetting | |
| Select a fact, choose an unlearning method, and watch the model forget. | |
| """) | |
| with gr.Row(): | |
| fact_dropdown = gr.Dropdown( | |
| choices=list(SAMPLE_FACTS.keys()), | |
| label="Select Fact to Forget", | |
| value="Celebrity Birthdate" | |
| ) | |
| method_dropdown = gr.Dropdown( | |
| choices=list(UNLEARNING_METHODS.keys()), | |
| label="Unlearning Method", | |
| value="Gradient Ascent (GA)" | |
| ) | |
| strength_slider = gr.Slider( | |
| 0.1, 1.0, 0.7, step=0.1, | |
| label="Unlearning Strength" | |
| ) | |
| forget_btn = gr.Button("Run Unlearning", variant="primary") | |
| unlearn_chart = gr.Plot() | |
| gr.Markdown("### Before / After Comparison") | |
| with gr.Row(): | |
| query_box = gr.Textbox(label="Query", interactive=False) | |
| with gr.Row(): | |
| with gr.Column(): | |
| gr.Markdown("**BEFORE Unlearning**") | |
| before_box = gr.Textbox(label="Original Response", lines=3, interactive=False) | |
| with gr.Column(): | |
| gr.Markdown("**AFTER Unlearning**") | |
| after_box = gr.Textbox(label="Unlearned Response", lines=3, interactive=False) | |
| with gr.Row(): | |
| forget_score = gr.Textbox(label="Forget Score", interactive=False) | |
| retain_score = gr.Textbox(label="Retain Score", interactive=False) | |
| forget_btn.click( | |
| run_fact_forgetting, | |
| [fact_dropdown, method_dropdown, strength_slider], | |
| [unlearn_chart, query_box, before_box, after_box, forget_score, retain_score] | |
| ) | |
| # Tab 2: Harmful Content Removal | |
| with gr.TabItem("Safety Unlearning"): | |
| gr.Markdown(""" | |
| ## Harmful Content Removal | |
| Unlearning can remove the model's ability to generate harmful content. | |
| Based on Metamorphosis (Aug 2025) for reliable harmful info removal. | |
| """) | |
| harm_categories = gr.CheckboxGroup( | |
| choices=list(HARMFUL_CATEGORIES.keys()), | |
| label="Select Harm Categories", | |
| value=list(HARMFUL_CATEGORIES.keys()) | |
| ) | |
| harm_chart = gr.Plot(value=create_harmful_content_chart(list(HARMFUL_CATEGORIES.keys()))) | |
| harm_categories.change(create_harmful_content_chart, [harm_categories], harm_chart) | |
| gr.Markdown(""" | |
| **Key Insight:** Effective safety unlearning reduces harmful generation by 80-90% | |
| while maintaining general model capabilities. | |
| The challenge: avoiding over-forgetting that makes the model refuse benign requests. | |
| """) | |
| # Tab 3: Method Comparison | |
| with gr.TabItem("Compare Methods"): | |
| gr.Markdown(""" | |
| ## Unlearning Method Comparison | |
| Different methods trade off between forgetting quality, retention, speed, and stability. | |
| """) | |
| radar_chart = gr.Plot(value=create_method_comparison_radar()) | |
| gr.Markdown(""" | |
| ### Method Summary | |
| | Method | Best For | Weakness | | |
| |--------|----------|----------| | |
| | Gradient Ascent | Speed | Catastrophic collapse | | |
| | GradDiff | Balance | Needs retain data | | |
| | KL Minimization | Utility preservation | Weak forgetting | | |
| | NPO | Stability | Slower training | | |
| | Task Vectors | Simplicity | Imprecise removal | | |
| | SCRUB | Quality | Compute cost | | |
| | Influence Functions | Precision | Very slow | | |
| """) | |
| # Tab 4: Impossibility Theorem | |
| with gr.TabItem("The Hard Truth"): | |
| gr.Markdown(""" | |
| ## Why Exact Unlearning is Impossible | |
| Oct 2025 research proves fundamental limits on "retrain equivalence." | |
| No approximate method can perfectly match a retrained model. | |
| """) | |
| impossibility_chart = gr.Plot(value=create_impossibility_theorem_viz()) | |
| gr.Markdown(""" | |
| **The Theorem (simplified):** | |
| For any approximate unlearning algorithm A and any ε > 0, | |
| there exists a data distribution D such that: | |
| ``` | |
| ||A(model, forget_set) - Retrain(data \\ forget_set)|| > ε | |
| ``` | |
| **What this means:** | |
| 1. Perfect unlearning requires full retraining | |
| 2. Approximate methods always leave some trace | |
| 3. The gap grows with forget set size | |
| 4. Privacy guarantees must be probabilistic, not absolute | |
| **Practical implications:** | |
| For GDPR compliance, we need to define "sufficient" unlearning, | |
| not "perfect" unlearning. Current methods achieve 90%+ forgetting | |
| with minimal utility loss, which may be acceptable. | |
| """) | |
| # Tab 5: Resources | |
| with gr.TabItem("Resources"): | |
| gr.Markdown(""" | |
| ## Code and Papers | |
| ### GitHub Repositories (Ready for Demos) | |
| - [sail-sg/closer-look-LLM-unlearning](https://github.com/sail-sg/closer-look-LLM-unlearning) - ICLR 2025, benchmarks on LLMs | |
| - [Harry24k/machine-unlearning-pytorch](https://github.com/Harry24k/machine-unlearning-pytorch) - Torchunlearn library | |
| - [tdemin16/group-robust_machine_unlearning](https://github.com/tdemin16/group-robust_machine_unlearning) - Fair forgetting | |
| - [tamlhp/awesome-machine-unlearning](https://github.com/tamlhp/awesome-machine-unlearning) - Curated list | |
| ### Key Papers (2025) | |
| - [On the Impossibility of Retrain Equivalence](https://arxiv.org/abs/2510.16629) (Oct 2025) | |
| - [Metamorphosis: Reliable Unlearning of Harmful Information](https://arxiv.org/abs/2508.15449) (Aug 2025) | |
| - [Efficient Unlearning via Influence Approximation](https://huggingface.co/papers/2507.23257) (Jul 2025) | |
| - [SoK: Machine Unlearning for LLMs](https://arxiv.org/abs/2506.09227) (Jun 2025) | |
| - [Group-Robust Machine Unlearning](https://huggingface.co/papers/2503.09330) (Mar 2025) | |
| - [PEBench: Multimodal Unlearning](https://huggingface.co/papers/2503.12545) (Mar 2025) | |
| ### Benchmarks | |
| - [TOFU](https://huggingface.co/datasets/locuslab/TOFU) - Fictitious facts (2.5M downloads) | |
| - [CLEAR](https://huggingface.co/datasets/therem/CLEAR) - Multimodal unlearning | |
| - [RWKU](https://rwku-bench.github.io) - Real-world knowledge | |
| --- | |
| **Built by:** Eric Raymond Samiksha BC| Purdue AI/Robotics Engineering | IU Southbend | |
| *Tag @sail_sg on X if you build something cool with this!* | |
| """) | |
| gr.Markdown(""" | |
| --- | |
| *"The right to be forgotten is not just a legal requirement. | |
| It's a fundamental challenge in AI safety."* | |
| """) | |
| if __name__ == "__main__": | |
| demo.launch() | |