import os import gradio as gr from huggingface_hub import InferenceClient from datasets import load_dataset import random import re import sympy as sp # Added SymPy for symbolic computation and better rendering/verification # Global datasets - load lazily math_samples = None def load_sample_problems(): """Load sample problems from ALL datasets - FIXED VERSION""" global math_samples if math_samples is not None: return math_samples samples = [] try: print("🔄 Loading GSM8K...") # GSM8K (math problems) gsm8k = load_dataset("openai/gsm8k", "main", streaming=True) gsm_count = 0 for i, item in enumerate(gsm8k["train"]): samples.append(item["question"]) gsm_count += 1 if gsm_count >= 50: break print("🔄 Loading Fineweb-edu...") # Fineweb-edu (educational text - extract math-like questions) fw = load_dataset("HuggingFaceFW/fineweb-edu", name="sample-10BT", split="train", streaming=True) fw_count = 0 for item in fw: # Filter for math-related content text_lower = item['text'].lower() if any(word in text_lower for word in ['math', 'calculate', 'solve', 'derivative', 'integral', 'triangle', 'equation', 'area', 'volume', 'probability']): # Truncate and format as question question = item['text'][:150].strip() if len(question) > 20: # Ensure it's substantial samples.append(question + " (Solve this math problem.)") fw_count += 1 if fw_count >= 20: break print("🔄 Loading Ultrachat...") # Ultrachat_200k (chat-like math queries) ds = load_dataset("HuggingFaceH4/ultrachat_200k", streaming=True) ds_count = 0 for item in ds: if len(item['messages']) > 0: content = item['messages'][0]['content'].lower() if any(word in content for word in ['math', 'calculate', 'solve', 'problem', 'equation', 'derivative', 'integral']): user_msg = item['messages'][0]['content'] if len(user_msg) > 10: # Valid length samples.append(user_msg) ds_count += 1 if ds_count >= 20: break print(f"✅ Loaded {len(samples)} samples: GSM8K ({gsm_count}), Fineweb-edu ({fw_count}), Ultrachat ({ds_count})") math_samples = samples return samples except Exception as e: print(f"⚠️ Dataset error: {e}, using fallback") math_samples = [ "What is the derivative of f(x) = 3x² + 2x - 1?", "A triangle has sides of length 5, 12, and 13. What is its area?", "If log₂(x) + log₂(x+6) = 4, find the value of x.", "Find the limit: lim(x->0) (sin(x)/x)", "Solve the system: x + 2y = 7, 3x - y = 4", "Calculate the integral of sin(x) from 0 to pi.", "What is the probability of rolling a 6 on a die 3 times in a row?" ] return math_samples def create_math_system_message(): """Specialized system prompt for mathematics with LaTeX""" return r"""You are Mathetics AI, an advanced mathematics tutor and problem solver. 🧮 **Your Expertise:** - Step-by-step problem solving with clear explanations - Multiple solution approaches when applicable - Proper mathematical notation and terminology using LaTeX - Verification of answers through different methods 📐 **Problem Domains:** - Arithmetic, Algebra, and Number Theory - Geometry, Trigonometry, and Coordinate Geometry - Calculus (Limits, Derivatives, Integrals) - Statistics, Probability, and Data Analysis - Competition Mathematics (AMC, AIME level) 💡 **Teaching Style:** 1. **Understand the Problem** - Identify what's being asked 2. **Plan the Solution** - Choose the appropriate method 3. **Execute Step-by-Step** - Show all work clearly with LaTeX formatting 4. **Verify the Answer** - Check if the result makes sense 5. **Alternative Methods** - Mention other possible approaches **LaTeX Guidelines:** - Use $...$ for inline math: $x^2 + y^2 = z^2$ - Use $$...$$ for display math - Box final answers: \boxed{answer} - Fractions: \frac{numerator}{denominator} - Limits: \lim_{x \to 0} - Derivatives: \frac{d}{dx} or f'(x) Always be precise, educational, and encourage mathematical thinking.""" def render_latex(text): """Enhanced LaTeX cleanup with support for advanced SymPy outputs""" if not text: return text try: # Convert LaTeX bracket notation to dollar signs text = re.sub(r'\\\[(.*?)\\\]', r'$$\1$$', text, flags=re.DOTALL) text = re.sub(r'\\\((.*?)\\\)', r'$\1$', text, flags=re.DOTALL) # Fix boxed answers if not in math mode if '\\boxed' in text and not re.search(r'\$.*\\boxed.*\$', text): text = re.sub(r'\\boxed\{([^}]+)\}', r'$$\boxed{\1}$$', text) # Handle equation environments for display in Gradio (convert to $$...$$) text = re.sub(r'\\begin\{equation\*\}(.*?)\\end\{equation\*\}', r'$$\1$$', text, flags=re.DOTALL) # Clean up any escaped % or special chars for Markdown compatibility text = re.sub(r'\\%', '%', text) except Exception as e: print(f"⚠️ LaTeX error: {e}") return text def try_sympy_compute(message): """Attempt to compute the result using SymPy for verification and better rendering, with advanced LaTeX options.""" message_lower = message.lower() x = sp.Symbol('x') # Handle definite integrals if 'integral' in message_lower or '∫' in message: match = re.search(r'(?:integral of|∫) (.+?) from (.+?) to (.+)', message_lower) if match: expr_str, lower, upper = match.groups() try: expr = sp.sympify(expr_str.replace('^', '**')) # Handle ^ for power result = sp.integrate(expr, (x, sp.sympify(lower), sp.sympify(upper))) # Advanced LaTeX: fold fractions, plain mode, manual box return r'\boxed{' + sp.latex(result, mode='plain', fold_frac_powers=True) + r'}' except Exception as e: print(f"⚠️ SymPy integral error: {e}") return None # Handle derivatives with inv_trig_style elif 'derivative' in message_lower: match = re.search(r'derivative of (.+)', message_lower) if match: expr_str = match.group(1) try: expr = sp.sympify(expr_str.replace('^', '**')) result = sp.diff(expr, x) # Advanced LaTeX: power style for inv trig, fold short frac return r'\boxed{' + sp.latex(result, inv_trig_style='power', fold_short_frac=True) + r'}' except Exception as e: print(f"⚠️ SymPy derivative error: {e}") return None # Handle limits elif 'limit' in message_lower or 'lim' in message_lower: match = re.search(r'(?:limit|lim) (.+?) as (.+?) to (.+)', message_lower) if match: expr_str, var, to_val = match.groups() try: expr = sp.sympify(expr_str.replace('^', '**')) result = sp.limit(expr, sp.Symbol(var), sp.sympify(to_val)) # Advanced LaTeX: equation* mode for display return sp.latex(result, mode='equation*') except Exception as e: print(f"⚠️ SymPy limit error: {e}") return None # Handle triangle area (Heron's formula) elif 'area of triangle' in message_lower: match = re.search(r'(\d+)[ -](\d+)[ -](\d+)', message_lower) # Matches 5-12-13 or 5 12 13 if match: a, b, c = map(float, match.groups()) try: s = (a + b + c) / 2 area = sp.sqrt(s * (s - a) * (s - b) * (s - c)) # Advanced LaTeX: inline mode with folding return r'\boxed{' + sp.latex(area, mode='inline', fold_frac_powers=True) + r'}' except Exception as e: print(f"⚠️ SymPy area error: {e}") return None # Handle simple matrices (e.g., "matrix [[1,2],[3,4]]") elif 'matrix' in message_lower: match = re.search(r'matrix \[\[(.+?)\]\]', message_lower) # Basic parsing; extend as needed if match: try: elements = [list(map(sp.sympify, row.split(','))) for row in match.group(1).split('],[')] m = sp.Matrix(elements) # Advanced LaTeX: custom delimiters and matrix style return sp.latex(m, mat_delim='[', mat_str='bmatrix') except Exception as e: print(f"⚠️ SymPy matrix error: {e}") return None return None def respond(message, history, system_message, max_tokens, temperature, top_p): """Non-streaming response for stability, with SymPy verification for supported queries.""" client = InferenceClient(model="Qwen/Qwen2.5-Math-7B-Instruct") messages = [{"role": "system", "content": system_message}] # Iterate over history dicts and add user/assistant pairs for msg in history: if msg["role"] == "user": messages.append({"role": "user", "content": msg["content"]}) elif msg["role"] == "assistant": messages.append({"role": "assistant", "content": msg["content"]}) messages.append({"role": "user", "content": message}) try: completion = client.chat_completion( messages, max_tokens=max_tokens, temperature=temperature, top_p=top_p, ) response = completion.choices[0].message.content # Add SymPy verification if applicable (now with advanced LaTeX) sympy_result = try_sympy_compute(message) if sympy_result: response += "\n\n**Verified with SymPy (for exact symbolic computation):** $$" + sympy_result + "$$" return render_latex(response) except Exception as e: return f"❌ Error: {str(e)[:100]}... Try a simpler problem." def get_random_sample(): """Get a random sample problem - loads datasets if needed""" global math_samples if math_samples is None: math_samples = load_sample_problems() return random.choice(math_samples) def insert_sample_to_chat(difficulty): """Insert random sample into chat input""" return get_random_sample() def show_help(): return """**🧮 Math Help Tips:** 1. Be Specific: "Find the derivative of x² + 3x" instead of "help with calculus" 2. Request Steps: "Show me step-by-step how to solve..." 3. Ask for Verification: "Check if my answer x=5 is correct" 4. Alternative Methods: "What's another way to solve this integral?" 5. Use Clear Notation: "lim(x->0)" for limits Pro Tip: Crank tokens to 1500+ for competition problems!""" # Simple Chatbot interface with gr.Blocks(title="🧮 Mathetics AI") as demo: gr.Markdown("# 🧮 **Mathetics AI** - Math Tutor\nPowered by Qwen 2.5-Math") chatbot = gr.Chatbot(height=500, label="Conversation", type='messages') help_text = gr.Markdown(visible=False) msg = gr.Textbox(placeholder="Ask a math problem...", show_label=False) with gr.Row(): submit = gr.Button("Solve", variant="primary") clear = gr.Button("Clear", variant="secondary") sample = gr.Button("Random Problem", variant="secondary") help_btn = gr.Button("Help", variant="secondary") gr.Examples( examples=[ ["derivative of x^2 sin(x)"], ["area of triangle 5-12-13"], ["∫x^2 dx from 0 to 2"], ["limit sin(x)/x as x to 0"], ["matrix [[1,2],[3,4]]"] ], inputs=msg ) def chat_response(message, history): """Updated to use dict-based history for type='messages'.""" bot_response = respond(message, history, create_math_system_message(), 1024, 0.3, 0.85) # Append as dicts, not tuples history.append({"role": "user", "content": message}) history.append({"role": "assistant", "content": bot_response}) return history, "" def clear_chat(): """Clear the chat history and textbox.""" return [], "" msg.submit(chat_response, [msg, chatbot], [chatbot, msg]) submit.click(chat_response, [msg, chatbot], [chatbot, msg]) clear.click(clear_chat, outputs=[chatbot, msg]) sample.click(insert_sample_to_chat, outputs=msg) help_btn.click(lambda: (show_help(), gr.update(visible=True)), outputs=[help_text, help_text]).then( lambda: gr.update(visible=False), outputs=help_text ) demo.launch()