| import os |
| import gradio as gr |
| from huggingface_hub import InferenceClient |
| from datasets import load_dataset |
| import random |
| import re |
|
|
| |
| math_samples = None |
|
|
| def load_sample_problems(): |
| """Load sample problems from ALL datasets - FIXED VERSION""" |
| global math_samples |
| if math_samples is not None: |
| return math_samples |
| |
| samples = [] |
| try: |
| print("๐ Loading GSM8K...") |
| |
| gsm8k = load_dataset("openai/gsm8k", "main", streaming=True) |
| gsm_count = 0 |
| for i, item in enumerate(gsm8k["train"]): |
| samples.append(item["question"]) |
| gsm_count += 1 |
| if gsm_count >= 50: |
| break |
| |
| print("๐ Loading Fineweb-edu...") |
| |
| fw = load_dataset("HuggingFaceFW/fineweb-edu", name="sample-10BT", split="train", streaming=True) |
| fw_count = 0 |
| for item in fw: |
| |
| text_lower = item['text'].lower() |
| if any(word in text_lower for word in ['math', 'calculate', 'solve', 'derivative', 'integral', 'triangle', 'equation', 'area', 'volume', 'probability']): |
| |
| question = item['text'][:150].strip() |
| if len(question) > 20: |
| samples.append(question + " (Solve this math problem.)") |
| fw_count += 1 |
| if fw_count >= 20: |
| break |
| |
| print("๐ Loading Ultrachat...") |
| |
| ds = load_dataset("HuggingFaceH4/ultrachat_200k", streaming=True) |
| ds_count = 0 |
| for item in ds: |
| if len(item['messages']) > 0: |
| content = item['messages'][0]['content'].lower() |
| if any(word in content for word in ['math', 'calculate', 'solve', 'problem', 'equation', 'derivative', 'integral']): |
| user_msg = item['messages'][0]['content'] |
| if len(user_msg) > 10: |
| samples.append(user_msg) |
| ds_count += 1 |
| if ds_count >= 20: |
| break |
| |
| print(f"โ
Loaded {len(samples)} samples: GSM8K ({gsm_count}), Fineweb-edu ({fw_count}), Ultrachat ({ds_count})") |
| math_samples = samples |
| return samples |
| |
| except Exception as e: |
| print(f"โ ๏ธ Dataset error: {e}, using fallback") |
| math_samples = [ |
| "What is the derivative of f(x) = 3xยฒ + 2x - 1?", |
| "A triangle has sides of length 5, 12, and 13. What is its area?", |
| "If logโ(x) + logโ(x+6) = 4, find the value of x.", |
| "Find the limit: lim(x->0) (sin(x)/x)", |
| "Solve the system: x + 2y = 7, 3x - y = 4", |
| "Calculate the integral of sin(x) from 0 to pi.", |
| "What is the probability of rolling a 6 on a die 3 times in a row?" |
| ] |
| return math_samples |
|
|
| def create_math_system_message(): |
| """Specialized system prompt for mathematics with LaTeX""" |
| return r"""You are Mathetics AI, an advanced mathematics tutor and problem solver. |
| |
| ๐งฎ **Your Expertise:** |
| - Step-by-step problem solving with clear explanations |
| - Multiple solution approaches when applicable |
| - Proper mathematical notation and terminology using LaTeX |
| - Verification of answers through different methods |
| |
| ๐ **Problem Domains:** |
| - Arithmetic, Algebra, and Number Theory |
| - Geometry, Trigonometry, and Coordinate Geometry |
| - Calculus (Limits, Derivatives, Integrals) |
| - Statistics, Probability, and Data Analysis |
| - Competition Mathematics (AMC, AIME level) |
| |
| ๐ก **Teaching Style:** |
| 1. **Understand the Problem** - Identify what's being asked |
| 2. **Plan the Solution** - Choose the appropriate method |
| 3. **Execute Step-by-Step** - Show all work clearly with LaTeX formatting |
| 4. **Verify the Answer** - Check if the result makes sense |
| 5. **Alternative Methods** - Mention other possible approaches |
| |
| **LaTeX Guidelines:** |
| - Use $...$ for inline math: $x^2 + y^2 = z^2$ |
| - Use $$...$$ for display math |
| - Box final answers: \boxed{answer} |
| - Fractions: \frac{numerator}{denominator} |
| - Limits: \lim_{x \to 0} |
| - Derivatives: \frac{d}{dx} or f'(x) |
| |
| Always be precise, educational, and encourage mathematical thinking.""" |
|
|
| def render_latex(text): |
| """Minimal LaTeX cleanup - let the model do the work""" |
| if not text: |
| return text |
| |
| try: |
| |
| text = re.sub(r'\\\[(.*?)\\\]', r'$$\1$$', text, flags=re.DOTALL) |
| text = re.sub(r'\\\((.*?)\\\)', r'$\1$', text, flags=re.DOTALL) |
| |
| |
| if '\\boxed' in text and not re.search(r'\$.*\\boxed.*\$', text): |
| text = re.sub(r'\\boxed\{([^}]+)\}', r'$\\boxed{\1}$', text) |
| |
| except Exception as e: |
| print(f"โ ๏ธ LaTeX error: {e}") |
| |
| return text |
|
|
| def respond(message, history, system_message, max_tokens, temperature, top_p): |
| """Streaming response with yield""" |
| client = InferenceClient(model="Qwen/Qwen2.5-Math-7B-Instruct") |
| |
| messages = [{"role": "system", "content": system_message}] |
| for msg in history: |
| if msg["role"] == "user": |
| messages.append({"role": "user", "content": msg["content"]}) |
| elif msg["role"] == "assistant": |
| messages.append({"role": "assistant", "content": msg["content"]}) |
| messages.append({"role": "user", "content": message}) |
| |
| response = "" |
| try: |
| for chunk in client.chat_completion( |
| messages, |
| max_tokens=max_tokens, |
| temperature=temperature, |
| top_p=top_p, |
| stream=True |
| ): |
| if chunk.choices[0].delta.content: |
| response += chunk.choices[0].delta.content |
| yield render_latex(response) |
| |
| except Exception as e: |
| yield f"โ Error: {str(e)[:100]}..." |
|
|
| def get_random_sample(): |
| """Get a random sample problem - loads datasets if needed""" |
| global math_samples |
| if math_samples is None: |
| math_samples = load_sample_problems() |
| return random.choice(math_samples) |
|
|
| def insert_sample_to_chat(difficulty): |
| """Insert random sample into chat input""" |
| return get_random_sample() |
|
|
| def show_help(): |
| return """**๐งฎ Math Help Tips:** |
| |
| 1. Be Specific: "Find the derivative of xยฒ + 3x" instead of "help with calculus" |
| 2. Request Steps: "Show me step-by-step how to solve..." |
| 3. Ask for Verification: "Check if my answer x=5 is correct" |
| 4. Alternative Methods: "What's another way to solve this integral?" |
| 5. Use Clear Notation: "lim(x->0)" for limits |
| |
| Pro Tip: Crank tokens to 1500+ for competition problems!""" |
|
|
| |
| with gr.Blocks(title="๐งฎ Mathetics AI") as demo: |
| gr.Markdown("# ๐งฎ **Mathetics AI** - Math Tutor\nPowered by Qwen 2.5-Math") |
| |
| chatbot = gr.Chatbot(height=500, label="Conversation", type='messages') |
| help_text = gr.Markdown(visible=False) |
| |
| msg = gr.Textbox(placeholder="Ask a math problem...", show_label=False) |
| |
| with gr.Row(): |
| submit = gr.Button("Solve", variant="primary") |
| clear = gr.Button("Clear", variant="secondary") |
| sample = gr.Button("Random Problem", variant="secondary") |
| help_btn = gr.Button("Help", variant="secondary") |
| |
| gr.Examples( |
| examples=[ |
| ["derivative of x^2 sin(x)"], |
| ["area of triangle 5-12-13"], |
| ["โซx^2 dx"] |
| ], |
| inputs=msg |
| ) |
| |
| def chat_response(message, history): |
| """Streaming chat response""" |
| history.append({"role": "user", "content": message}) |
| history.append({"role": "assistant", "content": ""}) |
| |
| for partial_response in respond(message, history[:-1], create_math_system_message(), 1024, 0.3, 0.85): |
| history[-1]["content"] = partial_response |
| yield history, "" |
| |
| def clear_chat(): |
| """Clear the chat history and textbox.""" |
| return [], "" |
| |
| msg.submit(chat_response, [msg, chatbot], [chatbot, msg]) |
| submit.click(chat_response, [msg, chatbot], [chatbot, msg]) |
| clear.click(clear_chat, outputs=[chatbot, msg]) |
| sample.click(insert_sample_to_chat, outputs=msg) |
| help_btn.click(lambda: (show_help(), gr.update(visible=True)), outputs=[help_text, help_text]).then( |
| lambda: gr.update(visible=False), outputs=help_text |
| ) |
|
|
| demo.launch() |
|
|