Spaces:

launch
/

reasoning_earlystop

Sleeping

App Files Files Community

xinliucs commited on Jan 4

Commit

6736c46

verified ·

1 Parent(s): a07ece3

Upload app.py

Browse files

Files changed (1) hide show

app.py +138 -0

app.py ADDED Viewed

	@@ -0,0 +1,138 @@

+import gradio as gr
+import time
+# Paper Information
+PAPER_TITLE = "Answer Convergence as a Signal for Early Stopping in Reasoning"
+AUTHORS = "Xin Liu, Lu Wang (University of Michigan)"
+GITHUB_LINK = "https://github.com/launchnlp/reasoning_earlystop"
+# [cite_start]Case Data: Sourced from Paper Figure 1 [cite: 33-34] [cite_start]and Table 1 [cite: 440-441]
+examples = {
+    "Case 1: Record Sales (GSM8K) - Early Convergence": {
+        "question": "Marilyn's first record sold 10 times as many copies as Harald's. If they sold 88,000 copies combined, how many copies did Harald sell?",
+        "full_reasoning": """<think> Okay, so I need to figure out how many copies Harald sold.
+So 11H = 88,000 I need to solve for it by dividing both sides by 11
+88,000 divided by 11 should be 8,000.
+Wait, let me double-check that.
+If I multiply 11 by 8,000, I should get 88,000. That checks out.
+I don't think I made any mistakes here.
+So Harald sold 8,000 copies.
+</think> 8000""",
+        "early_stop_reasoning": """<think> Okay, so I need to figure out how many copies Harald sold.
+So 11H = 88,000 I need to solve for it by dividing both sides by 11
+88,000 divided by 11 should be 8,000.
+</think> 8000""",
+        "savings": "Result: The model converged early. Later steps were redundant."
+    },
+    "Case 2: Lemon Tree (GSM8K) - Token Savings": {
+        "question": "Carlos is planting a lemon tree. The tree will cost $90 to plant. Each year it will grow 7 lemons, which he can sell for $1.5 each. It costs $3 a year to water and feed the tree. How many years will it take before he starts earning money on the lemon tree?",
+        "full_reasoning": """To determine how many years... Solving for n: n=$90/$7.5, n=12.
+Wait, let me double-check my calculations to make sure I didn't make a mistake... That seems correct.
+Checking the revenue vs cost again...
+Everything looks fine.
+</think> \\boxed{12}""",
+        "early_stop_reasoning": """Okay, so Carlos is planting... Calculating that, 90 divided by 7.5 equals 12.
+</think> \\boxed{12}""",
+        "savings": "Efficiency Gain: ~23% Token Reduction (439 -> 338 tokens)."
+    }
+}
+def simulate_generation(case_name):
+    """Simulates the generation process to visualize the comparison."""
+    case = examples[case_name]
+    # Initial state
+    yield case["question"], "", "", "Initializing..."
+    full_text = case["full_reasoning"]
+    stop_text = case["early_stop_reasoning"]
+    current_full = ""
+    current_stop = ""
+    max_len = max(len(full_text), len(stop_text))
+    # Simulate streaming output (typewriter effect)
+    step_size = 5
+    for i in range(0, max_len, step_size):
+        # Update Full CoT
+        if i < len(full_text):
+            current_full = full_text[:i]
+        else:
+            current_full = full_text
+        # Update Early Stop CoT
+        if i < len(stop_text):
+            current_stop = stop_text[:i]
+        else:
+            current_stop = stop_text
+        # Determine status message
+        status = "Generating..."
+        if i >= len(stop_text) and i < len(full_text):
+            status = "⚡ Early Stopping Triggered! (Saving Compute) ⚡"
+        elif i >= len(full_text):
+            status = "Done."
+        yield case["question"], current_full, current_stop, status
+        time.sleep(0.05)
+    # Final yield to ensure complete text is shown
+    yield case["question"], full_text, stop_text, f"Done! {case['savings']}"
+# Build the Gradio Interface
+with gr.Blocks(theme=gr.themes.Soft()) as demo:
+    # Header Section
+    gr.Markdown(f"# 🛑 {PAPER_TITLE}")
+    gr.Markdown(f"**Authors:** {AUTHORS}")
+    gr.Markdown(f"**Code & Resources:** [GitHub Repository]({GITHUB_LINK}) | [ArXiv Paper](https://arxiv.org/abs/2506.02536)")
+    gr.Markdown("""
+    ### 💡 Demo Description
+    This interactive demo illustrates the core concept of our **Early Stopping** strategy.
+    * **Left Panel:** Shows the model's full Chain-of-Thought (CoT) reasoning process.
+    * **Right Panel:** Shows the reasoning process truncated by our method.
+    **Key Insight:** Models often reach **Answer Convergence** (the correct answer) well before completing the full reasoning chain. Subsequent steps are often redundant self-verification, which can be safely skipped to reduce inference costs.
+    """)
+    # Control Section
+    with gr.Row():
+        case_dropdown = gr.Dropdown(
+            choices=list(examples.keys()),
+            value="Case 1: Record Sales (GSM8K) - Early Convergence",
+            label="Select a Test Case"
+        )
+        run_btn = gr.Button("▶️ Run Simulation", variant="primary")
+    status_bar = gr.Textbox(label="Status", value="Ready to run", interactive=False)
+    # Display Section
+    with gr.Row():
+        with gr.Column():
+            gr.Markdown("### 🐢 Original (Full CoT)")
+            full_output = gr.Textbox(label="Standard Generation", lines=12)
+        with gr.Column():
+            gr.Markdown("### 🐇 Our Method (Early Stopping)")
+            stop_output = gr.Textbox(label="Early Stopping Generation", lines=12)
+    # Event Listener
+    run_btn.click(
+        fn=simulate_generation,
+        inputs=case_dropdown,
+        outputs=[gr.Textbox(visible=False), full_output, stop_output, status_bar]
+    )
+    # Results Footer
+    gr.Markdown("""
+    ---
+    ### 📊 Key Results (from Paper)
+    Our experiments across five benchmarks (including NQ, GSM8K, GPQA) reveal substantial redundancy in standard CoT:
+    * **NaturalQuestions (NQ):** Token reduction of over **40%** with improved accuracy using *Learn-to-Stop*.
+    * **GSM8K:** Token reduction of **~45%** with minimal or no accuracy drop.
+    * **Methods:** We propose three strategies: *Answer Consistency* (Unsupervised), *Think Token Adjustment* (Unsupervised), and *Learn-to-Stop* (Supervised).
+    """)
+if __name__ == "__main__":
+    demo.launch()