Spaces:

TheLoopStudio
/

Mathematics-AI

Paused

App Files Files Community

Mathematics-AI / app.py

Khoi1234210

Update app.py

225572b verified 7 months ago

raw

history blame

9.11 kB

	import gradio as gr
	from huggingface_hub import InferenceClient
	from datasets import load_dataset
	import random
	import re

	# Global datasets - load lazily
	math_samples = None

	def load_sample_problems():
	"""Load sample problems from ALL datasets - FIXED VERSION"""
	global math_samples
	if math_samples is not None:
	return math_samples

	samples = []
	try:
	print("🔄 Loading GSM8K...")
	# GSM8K (math problems)
	gsm8k = load_dataset("openai/gsm8k", "main", streaming=True)
	gsm_count = 0
	for i, item in enumerate(gsm8k["train"]):
	samples.append(item["question"])
	gsm_count += 1
	if gsm_count >= 50:
	break

	print("🔄 Loading Fineweb-edu...")
	# Fineweb-edu (educational text - extract math-like questions)
	fw = load_dataset("HuggingFaceFW/fineweb-edu", name="sample-10BT", split="train", streaming=True)
	fw_count = 0
	for item in fw:
	# Filter for math-related content
	text_lower = item['text'].lower()
	if any(word in text_lower for word in ['math', 'calculate', 'solve', 'derivative', 'integral', 'triangle', 'equation', 'area', 'volume', 'probability']):
	# Truncate and format as question
	question = item['text'][:150].strip()
	if len(question) > 20: # Ensure it's substantial
	samples.append(question + " (Solve this math problem.)")
	fw_count += 1
	if fw_count >= 20:
	break

	print("🔄 Loading Ultrachat...")
	# Ultrachat_200k (chat-like math queries)
	ds = load_dataset("HuggingFaceH4/ultrachat_200k", streaming=True)
	ds_count = 0
	for item in ds:
	if len(item['messages']) > 0:
	content = item['messages'][0]['content'].lower()
	if any(word in content for word in ['math', 'calculate', 'solve', 'problem', 'equation', 'derivative', 'integral']):
	user_msg = item['messages'][0]['content']
	if len(user_msg) > 10: # Valid length
	samples.append(user_msg)
	ds_count += 1
	if ds_count >= 20:
	break

	print(f"✅ Loaded {len(samples)} samples: GSM8K ({gsm_count}), Fineweb-edu ({fw_count}), Ultrachat ({ds_count})")
	math_samples = samples
	return samples

	except Exception as e:
	print(f"⚠️ Dataset error: {e}, using fallback")
	math_samples = [
	"What is the derivative of f(x) = 3x² + 2x - 1?",
	"A triangle has sides of length 5, 12, and 13. What is its area?",
	"If log₂(x) + log₂(x+6) = 4, find the value of x.",
	"Find the limit: lim(x->0) (sin(x)/x)",
	"Solve the system: x + 2y = 7, 3x - y = 4",
	"Calculate the integral of sin(x) from 0 to pi.",
	"What is the probability of rolling a 6 on a die 3 times in a row?"
	]
	return math_samples

	def create_math_system_message():
	"""Specialized system prompt for mathematics with LaTeX"""
	return r"""You are Mathetics AI, an advanced mathematics tutor and problem solver.

	🧮 Your Expertise:
	- Step-by-step problem solving with clear explanations
	- Multiple solution approaches when applicable
	- Proper mathematical notation and terminology using LaTeX
	- Verification of answers through different methods

	📐 Problem Domains:
	- Arithmetic, Algebra, and Number Theory
	- Geometry, Trigonometry, and Coordinate Geometry
	- Calculus (Limits, Derivatives, Integrals)
	- Statistics, Probability, and Data Analysis
	- Competition Mathematics (AMC, AIME level)

	💡 Teaching Style:
	1. Understand the Problem - Identify what's being asked
	2. Plan the Solution - Choose the appropriate method
	3. Execute Step-by-Step - Show all work clearly with LaTeX formatting
	4. Verify the Answer - Check if the result makes sense
	5. Alternative Methods - Mention other possible approaches

	LaTeX Guidelines:
	- Use $...$ for inline math: $x^2 + y^2 = z^2$
	- Use $$...$$ for display math
	- Box final answers: \boxed{answer}
	- Fractions: \frac{numerator}{denominator}
	- Limits: \lim_{x \to 0}
	- Derivatives: \frac{d}{dx} or f'(x)

	Always be precise, educational, and encourage mathematical thinking."""

	def render_latex(text):
	"""Enhanced LaTeX rendering - fixes raw code output"""
	if not text or len(text) < 5:
	return text

	try:
	# Fix common LaTeX patterns from Qwen
	text = re.sub(r'(?<!\\)\$([^\$]+)\$(?!\$)', r'$\1$', text)
	text = re.sub(r'\$\$([^\$]+)\$$', r'$$\1$$', text)
	text = re.sub(r'\\\[([^\\]+)\\\]', r'$$\1$$', text)
	text = re.sub(r'\\$([^\\]+)\\$', r'$\1$', text)

	# Fix escaped LaTeX commands
	text = re.sub(r'\\(lim\|frac\|sqrt\|int\|sum\|prod\|partial\|nabla\|infty\|to\|le\|ge\|neq\|approx\|cdot\|times\|div\|deg\|prime\|log\|ln\|sin\|cos\|tan\|cot\|sec\|csc\|arcsin\|arccos\|arctan\|sinh\|cosh)', r'\1', text)

	# Ensure boxed answers render
	text = re.sub(r'\\boxed\{([^}]+)\}', r'$$\boxed{\1}$$', text)
	text = re.sub(r'\\frac\{([^}]+)\}\{([^}]+)\}', r'$\frac{\1}{\2}$', text)

	# Clean up extra spaces
	text = re.sub(r'\s([\$\\])\s', r'\1', text)

	except Exception as e:
	print(f"⚠️ LaTeX formatting error: {e}")

	return text

	def respond(message, history, system_message, max_tokens, temperature, top_p):
	"""Non-streaming response for stability"""
	client = InferenceClient(model="Qwen/Qwen2.5-Math-7B-Instruct")

	messages = [{"role": "system", "content": system_message}]
	# Iterate over history dicts and add user/assistant pairs
	for msg in history:
	if msg["role"] == "user":
	messages.append({"role": "user", "content": msg["content"]})
	elif msg["role"] == "assistant":
	messages.append({"role": "assistant", "content": msg["content"]})
	messages.append({"role": "user", "content": message})

	try:
	completion = client.chat_completion(
	messages,
	max_tokens=max_tokens,
	temperature=temperature,
	top_p=top_p,
	)
	response = completion.choices[0].message.content
	return render_latex(response)
	except Exception as e:
	return f"❌ Error: {str(e)[:100]}... Try a simpler problem."

	def get_random_sample():
	"""Get a random sample problem - loads datasets if needed"""
	global math_samples
	if math_samples is None:
	math_samples = load_sample_problems()
	return random.choice(math_samples)

	def insert_sample_to_chat(difficulty):
	"""Insert random sample into chat input"""
	return get_random_sample()

	def show_help():
	return """🧮 Math Help Tips:

	1. Be Specific: "Find the derivative of x² + 3x" instead of "help with calculus"
	2. Request Steps: "Show me step-by-step how to solve..."
	3. Ask for Verification: "Check if my answer x=5 is correct"
	4. Alternative Methods: "What's another way to solve this integral?"
	5. Use Clear Notation: "lim(x->0)" for limits

	Pro Tip: Crank tokens to 1500+ for competition problems!"""

	# Simple Chatbot interface
	with gr.Blocks(title="🧮 Mathetics AI") as demo:
	gr.Markdown("# 🧮 Mathetics AI - Math Tutor\nPowered by Qwen 2.5-Math")

	chatbot = gr.Chatbot(height=500, label="Conversation", type='messages')
	help_text = gr.Markdown(visible=False)

	msg = gr.Textbox(placeholder="Ask a math problem...", show_label=False)

	with gr.Row():
	submit = gr.Button("Solve", variant="primary")
	clear = gr.Button("Clear", variant="secondary")
	sample = gr.Button("Random Problem", variant="secondary")
	help_btn = gr.Button("Help", variant="secondary")

	gr.Examples(
	examples=[
	["derivative of x^2 sin(x)"],
	["area of triangle 5-12-13"],
	["∫x^2 dx"]
	],
	inputs=msg
	)

	def chat_response(message, history):
	"""Updated to use dict-based history for type='messages'."""
	bot_response = respond(message, history, create_math_system_message(), 1024, 0.3, 0.85)
	# Append as dicts, not tuples
	history.append({"role": "user", "content": message})
	history.append({"role": "assistant", "content": bot_response})
	return history, ""

	def clear_chat():
	"""Clear the chat history and textbox."""
	return [], ""

	msg.submit(chat_response, [msg, chatbot], [chatbot, msg])
	submit.click(chat_response, [msg, chatbot], [chatbot, msg])
	clear.click(clear_chat, outputs=[chatbot, msg])
	sample.click(insert_sample_to_chat, outputs=msg)
	help_btn.click(lambda: (show_help(), gr.update(visible=True)), outputs=[help_text, help_text]).then(
	lambda: gr.update(visible=False), outputs=help_text
	)

	demo.launch()