Spaces:
Running
Running
| import gradio as gr | |
| import time | |
| # Paper Information | |
| PAPER_TITLE = "Answer Convergence as a Signal for Early Stopping in Reasoning" | |
| AUTHORS = "Xin Liu, Lu Wang (University of Michigan)" | |
| GITHUB_LINK = "https://github.com/launchnlp/reasoning_earlystop" | |
| # [cite_start]Case Data: Sourced from Paper Figure 1 [cite: 33-34] [cite_start]and Table 1 [cite: 440-441] | |
| examples = { | |
| "Case 1: Record Sales (GSM8K) - Early Convergence": { | |
| "question": "Marilyn's first record sold 10 times as many copies as Harald's. If they sold 88,000 copies combined, how many copies did Harald sell?", | |
| "full_reasoning": """<think> Okay, so I need to figure out how many copies Harald sold. | |
| So 11H = 88,000 I need to solve for it by dividing both sides by 11 | |
| 88,000 divided by 11 should be 8,000. | |
| Wait, let me double-check that. | |
| If I multiply 11 by 8,000, I should get 88,000. That checks out. | |
| I don't think I made any mistakes here. | |
| So Harald sold 8,000 copies. | |
| </think> 8000""", | |
| "early_stop_reasoning": """<think> Okay, so I need to figure out how many copies Harald sold. | |
| So 11H = 88,000 I need to solve for it by dividing both sides by 11 | |
| 88,000 divided by 11 should be 8,000. | |
| </think> 8000""", | |
| "savings": "Result: The model converged early. Later steps were redundant." | |
| }, | |
| "Case 2: Lemon Tree (GSM8K) - Token Savings": { | |
| "question": "Carlos is planting a lemon tree. The tree will cost $90 to plant. Each year it will grow 7 lemons, which he can sell for $1.5 each. It costs $3 a year to water and feed the tree. How many years will it take before he starts earning money on the lemon tree?", | |
| "full_reasoning": """To determine how many years... Solving for n: n=$90/$7.5, n=12. | |
| Wait, let me double-check my calculations to make sure I didn't make a mistake... That seems correct. | |
| Checking the revenue vs cost again... | |
| Everything looks fine. | |
| </think> \\boxed{12}""", | |
| "early_stop_reasoning": """Okay, so Carlos is planting... Calculating that, 90 divided by 7.5 equals 12. | |
| </think> \\boxed{12}""", | |
| "savings": "Efficiency Gain: ~23% Token Reduction (439 -> 338 tokens)." | |
| } | |
| } | |
| def simulate_generation(case_name): | |
| """Simulates the generation process to visualize the comparison.""" | |
| case = examples[case_name] | |
| # Initial state | |
| yield case["question"], "", "", "Initializing..." | |
| full_text = case["full_reasoning"] | |
| stop_text = case["early_stop_reasoning"] | |
| current_full = "" | |
| current_stop = "" | |
| max_len = max(len(full_text), len(stop_text)) | |
| # Simulate streaming output (typewriter effect) | |
| step_size = 5 | |
| for i in range(0, max_len, step_size): | |
| # Update Full CoT | |
| if i < len(full_text): | |
| current_full = full_text[:i] | |
| else: | |
| current_full = full_text | |
| # Update Early Stop CoT | |
| if i < len(stop_text): | |
| current_stop = stop_text[:i] | |
| else: | |
| current_stop = stop_text | |
| # Determine status message | |
| status = "Generating..." | |
| if i >= len(stop_text) and i < len(full_text): | |
| status = "β‘ Early Stopping Triggered! (Saving Compute) β‘" | |
| elif i >= len(full_text): | |
| status = "Done." | |
| yield case["question"], current_full, current_stop, status | |
| time.sleep(0.05) | |
| # Final yield to ensure complete text is shown | |
| yield case["question"], full_text, stop_text, f"Done! {case['savings']}" | |
| # Build the Gradio Interface | |
| with gr.Blocks(theme=gr.themes.Soft()) as demo: | |
| # Header Section | |
| gr.Markdown(f"# π {PAPER_TITLE}") | |
| gr.Markdown(f"**Authors:** {AUTHORS}") | |
| gr.Markdown(f"**Code & Resources:** [GitHub Repository]({GITHUB_LINK}) | [ArXiv Paper](https://arxiv.org/abs/2506.02536)") | |
| gr.Markdown(""" | |
| ### π‘ Demo Description | |
| This interactive demo illustrates the core concept of our **Early Stopping** strategy. | |
| * **Left Panel:** Shows the model's full Chain-of-Thought (CoT) reasoning process. | |
| * **Right Panel:** Shows the reasoning process truncated by our method. | |
| **Key Insight:** Models often reach **Answer Convergence** (the correct answer) well before completing the full reasoning chain. Subsequent steps are often redundant self-verification, which can be safely skipped to reduce inference costs. | |
| """) | |
| # Control Section | |
| with gr.Row(): | |
| case_dropdown = gr.Dropdown( | |
| choices=list(examples.keys()), | |
| value="Case 1: Record Sales (GSM8K) - Early Convergence", | |
| label="Select a Test Case" | |
| ) | |
| run_btn = gr.Button("βΆοΈ Run Simulation", variant="primary") | |
| status_bar = gr.Textbox(label="Status", value="Ready to run", interactive=False) | |
| # Display Section | |
| with gr.Row(): | |
| with gr.Column(): | |
| gr.Markdown("### π’ Original (Full CoT)") | |
| full_output = gr.Textbox(label="Standard Generation", lines=12) | |
| with gr.Column(): | |
| gr.Markdown("### π Our Method (Early Stopping)") | |
| stop_output = gr.Textbox(label="Early Stopping Generation", lines=12) | |
| # Event Listener | |
| run_btn.click( | |
| fn=simulate_generation, | |
| inputs=case_dropdown, | |
| outputs=[gr.Textbox(visible=False), full_output, stop_output, status_bar] | |
| ) | |
| # Results Footer | |
| gr.Markdown(""" | |
| --- | |
| ### π Key Results (from Paper) | |
| Our experiments across five benchmarks (including NQ, GSM8K, GPQA) reveal substantial redundancy in standard CoT: | |
| * **NaturalQuestions (NQ):** Token reduction of over **40%** with improved accuracy using *Learn-to-Stop*. | |
| * **GSM8K:** Token reduction of **~45%** with minimal or no accuracy drop. | |
| * **Methods:** We propose three strategies: *Answer Consistency* (Unsupervised), *Think Token Adjustment* (Unsupervised), and *Learn-to-Stop* (Supervised). | |
| """) | |
| if __name__ == "__main__": | |
| demo.launch() |