import gradio as gr from huggingface_hub import InferenceClient from datasets import load_dataset import random import re # Global datasets - load lazily math_samples = None def load_sample_problems(): """Load sample problems from ALL datasets""" global math_samples if math_samples is not None: return math_samples samples = [] try: # GSM8K (math problems) gsm8k = load_dataset("openai/gsm8k", "main", streaming=True) for i, item in enumerate(gsm8k["train"]): samples.append(item["question"]) if i >= 50: break # Fineweb-edu (educational text - extract math-like questions) fw = load_dataset("HuggingFaceFW/fineweb-edu", name="sample-10BT", split="train", streaming=True) fw_count = 0 for item in fw: # Filter for math-related content (simple keyword match) if any(word in item['text'].lower() for word in ['math', 'calculate', 'solve', 'derivative', 'integral', 'triangle', 'equation']): samples.append(item['text'][:200] + " (Solve this math problem.)") # Truncate for brevity fw_count += 1 if fw_count >= 20: break # Ultrachat_200k (chat-like math queries) ds = load_dataset("HuggingFaceH4/ultrachat_200k", streaming=True) ds_count = 0 for item in ds: if 'math' in item['messages'][0]['content'].lower() or 'calculate' in item['messages'][0]['content'].lower(): user_msg = item['messages'][0]['content'] samples.append(user_msg) ds_count += 1 if ds_count >= 20: break print(f"✅ Loaded {len(samples)} samples: GSM8K ({50}), Fineweb-edu ({fw_count}), Ultrachat ({ds_count})") math_samples = samples return samples except Exception as e: print(f"⚠️ Dataset error: {e}, using fallback") math_samples = [ "What is the derivative of f(x) = 3x² + 2x - 1?", "A triangle has sides of length 5, 12, and 13. What is its area?", "If log₂(x) + log₂(x+6) = 4, find the value of x.", "Find the limit: lim(x->0) (sin(x)/x)", "Solve the system: x + 2y = 7, 3x - y = 4", "Calculate the integral of sin(x) from 0 to pi.", "What is the probability of rolling a 6 on a die 3 times in a row?" ] return math_samples def create_math_system_message(): """Specialized system prompt for mathematics with LaTeX""" return """You are Mathetics AI, an advanced mathematics tutor and problem solver. 🧮 **Your Expertise:** - Step-by-step problem solving with clear explanations - Multiple solution approaches when applicable - Proper mathematical notation and terminology using LaTeX - Verification of answers through different methods 📐 **Problem Domains:** - Arithmetic, Algebra, and Number Theory - Geometry, Trigonometry, and Coordinate Geometry - Calculus (Limits, Derivatives, Integrals) - Statistics, Probability, and Data Analysis - Competition Mathematics (AMC, AIME level) 💡 **Teaching Style:** 1. **Understand the Problem** - Identify what's being asked 2. **Plan the Solution** - Choose the appropriate method 3. **Execute Step-by-Step** - Show all work clearly with LaTeX formatting 4. **Verify the Answer** - Check if the result makes sense 5. **Alternative Methods** - Mention other possible approaches **LaTeX Guidelines:** - Use $...$ for inline math: $x^2 + y^2 = z^2$ - Use $$...$$ for display math - Box final answers: \boxed{answer} - Fractions: \frac{numerator}{denominator} - Limits: \lim_{x \to 0} - Derivatives: \frac{d}{dx} or f'(x) Always be precise, educational, and encourage mathematical thinking.""" def render_latex(text): """Enhanced LaTeX rendering - fixes raw code output""" if not text or len(text) < 5: return text try: # Fix common LaTeX patterns from Qwen text = re.sub(r'(?0)" for limits **Pro Tip**: Crank tokens to 1500+ for competition problems!""" # CLASSIC Chatbot interface (Gradio 3.x compatible) def chat_response(message, history): """Main chat function - compatible with all Gradio versions""" bot_response = "" for response in respond( message, history, create_math_system_message(), 1024, # Default tokens 0.3, # Default temperature 0.85 # Default top_p ): bot_response = response history.append([message, bot_response]) yield history, "" return history, "" # Build the interface with gr.Blocks( title="🧮 Mathetics AI", theme=gr.themes.Soft(), css=""" /* Enhanced math rendering */ .markdown-body { font-family: 'Times New Roman', Georgia, serif; line-height: 1.6; } .katex { font-size: 1.1em !important; color: #2c3e50; } .katex-display { font-size: 1.3em !important; text-align: center; margin: 1em 0; padding: 10px; background: #f8f9fa; border-radius: 8px; } /* Chat styling */ .message { margin: 10px 0; padding: 12px; border-radius: 8px; } .user { background: linear-gradient(135deg, #e3f2fd 0%, #bbdefb 100%); border-left: 4px solid #2196f3; } .assistant { background: linear-gradient(135deg, #f5f5f5 0%, #eeeeee 100%); border-left: 4px solid #4caf50; } /* Sidebar */ .difficulty-selector { background: linear-gradient(135deg, #fff3e0 0%, #ffe0b2 100%); padding: 15px; border-radius: 10px; margin: 10px 0; border: 1px solid #ffcc80; } /* Responsive */ @media (max-width: 768px) { .katex { font-size: 1em !important; } .katex-display { font-size: 1.1em !important; } } """ ) as demo: gr.Markdown(""" # 🧮 **Mathetics AI** - Advanced Mathematics Solver **Your Personal AI Math Tutor** | Step-by-step solutions with beautiful LaTeX rendering --- """) # Main chat interface chatbot = gr.Chatbot( height=500, show_label=False, avatar_images=("🧑‍🎓", "🤖"), bubble_full_width=False ) with gr.Row(): msg = gr.Textbox( placeholder="Ask: 'Find the derivative of 3x² + 2x - 1'", scale=4, show_label=False, lines=2 ) submit_btn = gr.Button("🚀 Solve", variant="primary", scale=1) # Controls with gr.Row(): with gr.Column(scale=2): token_slider = gr.Slider(256, 2048, value=1024, step=128, label="📝 Max Tokens") temp_slider = gr.Slider(0.1, 1.0, value=0.3, step=0.1, label="🎯 Temperature") with gr.Column(scale=1): difficulty_preset = gr.Dropdown( choices=["Elementary", "High School", "College", "Competition"], value="High School", label="🎯 Difficulty", elem_classes=["difficulty-selector"] ) sample_btn = gr.Button("🎲 Random Problem", variant="secondary") help_btn = gr.Button("❓ Help", variant="secondary") # Examples gr.Examples( examples=[ ["Find the derivative of f(x) = 3x² + 2x - 1"], ["A triangle has sides 5, 12, and 13. What is its area?"], ["Solve: lim(x->0) sin(x)/x"], ["What is ∫(2x³ - 5x + 3) dx?"], ["Solve the system: x + 2y = 7, 3x - y = 4"] ], inputs=msg, label="💡 Quick Examples" ) # Event handlers def submit_message(message, history): return chat_response(message, history) def clear_chat(): return [], "" msg.submit(submit_message, [msg, chatbot], [msg, chatbot]) submit_btn.click(submit_message, [msg, chatbot], [msg, chatbot]) sample_btn.click( insert_sample_to_chat, inputs=[difficulty_preset], outputs=msg ) help_btn.click( show_help, outputs=gr.Markdown(visible=True, label="Help") ) # Clear button gr.Button("🗑️ Clear Chat", variant="secondary").click( clear_chat, outputs=[chatbot, msg] ) gr.Markdown(""" --- **🔧 Tech:** Qwen2.5-Math-7B • LaTeX rendering • Streaming responses **💡 Tip:** Use "lim(x->0)" for limits, crank tokens for complex problems """) if __name__ == "__main__": demo.launch()