DreamyDetective's picture
feat: added application files
69ac033 verified
"""
StepWise Math - Gradio MCP Framework Version
Transform Static Math Problems into Living, Interactive Step-by-Step Visual Proofs
Powered by Google Gemini 2.5 Flash & Gemini 3.0 Pro with Extended Thinking
"""
import gradio as gr
import os
import json
import time
import base64
import re
from datetime import datetime
from pathlib import Path
from typing import Optional, Tuple, List, Dict, Any
from io import BytesIO
from google import genai
from google.genai import types
import logging
# Configure basic logger
logger = logging.getLogger(__name__)
logging.basicConfig(level=logging.INFO)
# ==================== Configuration ====================
class Config:
"""Application configuration"""
DEFAULT_API_KEY = os.getenv("GEMINI_API_KEY", "")
LIBRARY_PATH = Path("saved_proofs")
EXAMPLES_PATH = Path("examples")
# Create directories if they don't exist
LIBRARY_PATH.mkdir(exist_ok=True)
EXAMPLES_PATH.mkdir(exist_ok=True)
# ==================== Data Models ====================
class MathSpec:
"""Structured mathematical concept specification"""
def __init__(self, data: dict):
self.concept_title = data.get("conceptTitle", "")
self.educational_goal = data.get("educationalGoal", "")
self.explanation = data.get("explanation", "")
self.steps = data.get("steps", [])
self.visual_spec = data.get("visualSpec", {})
def to_dict(self):
return {
"conceptTitle": self.concept_title,
"educationalGoal": self.educational_goal,
"explanation": self.explanation,
"steps": self.steps,
"visualSpec": self.visual_spec
}
# ==================== AI Pipeline ====================
class GeminiPipeline:
"""Two-stage AI pipeline for concept decomposition and code generation"""
def __init__(self, api_key: str):
self.api_key = api_key
self.client = genai.Client(api_key=api_key)
self.current_thought = ""
self.process_logs = []
def add_log(self, message: str, log_type: str = "info"):
"""Add a log entry with timestamp"""
timestamp = datetime.now().strftime("%H:%M:%S")
self.process_logs.append({
"timestamp": timestamp,
"message": message,
"type": log_type
})
return f"[{timestamp}] {message}"
async def process_stream(self, stream):
"""Process streaming response and extract thoughts"""
full_text = ""
self.current_thought = ""
for chunk in stream:
if not chunk.candidates or not chunk.candidates[0].content:
continue
for part in chunk.candidates[0].content.parts:
# Handle thoughts
if hasattr(part, 'thought') and part.thought:
thought_text = getattr(part, 'text', '')
self.current_thought += thought_text
else:
# This is content
text = getattr(part, 'text', '')
full_text += text
return full_text
def clean_json_output(self, text: str) -> str:
"""Remove markdown code blocks from JSON output and fix common JSON issues"""
cleaned = text.replace('```json', '').replace('```', '')
# Find first '{' and last '}'
start = cleaned.find('{')
end = cleaned.rfind('}')
if start != -1 and end != -1 and end > start:
cleaned = cleaned[start:end + 1]
cleaned = cleaned.strip()
# Try to fix common JSON issues
try:
# Validate JSON first
json.loads(cleaned)
return cleaned
except json.JSONDecodeError as e:
logger.warning(f"Initial JSON parse failed: {e}. Attempting to fix...")
# Try to fix trailing commas before ] or }
import re
cleaned = re.sub(r',(\s*[}\]])', r'\1', cleaned)
# Try to fix missing commas between properties (common in AI output)
# This is a best-effort fix
try:
json.loads(cleaned)
logger.info("Fixed JSON with comma cleanup")
return cleaned
except json.JSONDecodeError:
logger.error(f"Could not auto-fix JSON. Returning original: {cleaned[:500]}...")
return cleaned
def stage1_analyze_concept(self, input_text: str = "", input_url: str = "",
input_image: Optional[Any] = None,
input_mode: str = "text") -> Tuple[MathSpec, List[str]]:
"""
Stage 1: Concept Decomposition (Gemini 2.5 Flash)
Analyzes the math problem and creates a teaching plan
"""
logger.info("="*60)
logger.info("STAGE 1: CONCEPT ANALYSIS - Starting Gemini 2.5 Flash call")
logger.info(f"Input Mode: {input_mode}")
if input_mode == "text":
logger.info(f"Text Input Length: {len(input_text)} characters")
elif input_mode == "url":
logger.info(f"URL Input: {input_url}")
elif input_mode == "image":
logger.info(f"Image Input: {type(input_image)}")
self.add_log("Stage 1: Analyzing concept with Gemini 2.5 Flash...", "thinking")
system_instruction = """You are a world-class mathematics educator and visual designer.
Your goal is to translate user inputs into a "Step-by-Step Interactive Visual Proof".
Do not just solve the problem. Design a web application that guides the student through the concept incrementally.
CRITICAL DESIGN CONSTRAINT: Ensure the visual specification prioritizes clarity. Avoid clutter. Request layouts where controls, text, and diagrams are separated to prevent overlapping.
Return a JSON object with:
- conceptTitle: Short name (e.g., "Pythagorean Theorem").
- educationalGoal: What the student learns.
- explanation: Friendly markdown explanation.
- steps: An array of 3-6 logical steps.
- stepTitle: Title of this phase.
- instruction: What the user should do or observe (e.g., "Drag vertex A", "Click Next to see the area").
- visualFocus: What part of the visual changes or is highlighted.
- visualSpec: Technical details for the engineer.
- elements: List of visual objects.
- interactions: User actions.
- mathLogic: Formulas needed.
"""
parts = []
config = {
"thinking_config": types.ThinkingConfig(
include_thoughts=True,
thinking_budget=2048 # Limited budget to prevent response truncation
)
}
# Build request based on input mode
if input_mode == "url" and input_url:
self.add_log(f"Processing URL: {input_url}", "info")
prompt = f"""Analyze the math concept at this URL: {input_url}.
Design a step-by-step visual proof and return the specification in strict JSON format.
The JSON must match this structure exactly:
{{
"conceptTitle": "string",
"educationalGoal": "string",
"explanation": "string",
"steps": [ {{ "stepTitle": "string", "instruction": "string", "visualFocus": "string" }} ],
"visualSpec": {{ "elements": ["string"], "interactions": ["string"], "mathLogic": "string" }}
}}
IMPORTANT: Return ONLY the raw JSON string. Do not include markdown formatting, code blocks, or conversational text. Start the response with '{{'."""
parts.append({"text": prompt})
# Use both google_search and url_context for comprehensive URL processing
config["tools"] = [{"google_search": {}}, {"url_context": {}}]
# NOTE: Do NOT use response_mime_type or response_schema with URL grounding tools
# The model needs prompt-based guidance for JSON format when using these tools
elif input_mode == "image" and input_image is not None:
self.add_log("Processing uploaded image...", "info")
# Convert PIL Image to base64
buffered = BytesIO()
input_image.save(buffered, format="JPEG")
img_base64 = base64.b64encode(buffered.getvalue()).decode()
prompt = """Analyze the math problem in this image and design a step-by-step visual proof.
Return a complete, valid JSON object following the exact structure specified in the system instruction.
Ensure all JSON fields are properly closed and the response is a valid, parseable JSON."""
parts.append({"inline_data": {"mime_type": "image/jpeg", "data": img_base64}})
parts.append({"text": prompt})
config["response_mime_type"] = "application/json"
config["response_schema"] = self._get_math_spec_schema()
else: # text mode
self.add_log(f"Processing text input...", "info")
prompt = f"""Analyze this math problem/concept and design a step-by-step visual proof: {input_text}
Return a complete, valid JSON object following the exact structure specified in the system instruction.
Ensure all JSON fields are properly closed and the response is a valid, parseable JSON."""
parts.append({"text": prompt})
config["response_mime_type"] = "application/json"
config["response_schema"] = self._get_math_spec_schema()
# Generate response
logger.info("Sending API request to Gemini 2.5 Flash...")
logger.debug(f"Config: {config}")
try:
response = self.client.models.generate_content(
model="gemini-2.5-flash",
contents={"parts": parts},
config=types.GenerateContentConfig(
system_instruction=system_instruction,
**config
)
)
logger.info("✓ API response received successfully")
logger.debug(f"Response length: {len(response.text)} characters")
except Exception as api_error:
logger.error(f"API call failed: {str(api_error)}", exc_info=True)
raise
# Parse response
logger.info("Parsing API response...")
spec_text = response.text
logger.debug(f"Raw API response (first 500 chars): {spec_text[:500]}")
spec_text = self.clean_json_output(spec_text)
logger.debug(f"Cleaned JSON (first 500 chars): {spec_text[:500]}")
try:
logger.info("Parsing JSON specification...")
spec_data = json.loads(spec_text)
spec = MathSpec(spec_data)
logger.info(f"✓ Concept Title: {spec.concept_title}")
logger.info(f"✓ Educational Goal: {spec.educational_goal}")
logger.info(f"✓ Number of Steps: {len(spec.steps)}")
logger.info(f"Visual Elements: {spec.visual_spec.get('elements', [])}")
logger.info("STAGE 1: COMPLETE")
logger.info("="*60)
self.add_log(f"✓ Concept Identified: {spec.concept_title}", "success")
self.add_log(f"Planned {len(spec.steps)} interactive steps", "info")
return spec, self.process_logs
except json.JSONDecodeError as e:
logger.error(f"JSON Parse Error: {str(e)}", exc_info=True)
logger.error(f"Failed response text (first 1000 chars): {spec_text[:1000]}")
logger.error(f"Failed response text (around error position): {spec_text[max(0, e.pos-100):min(len(spec_text), e.pos+100)]}")
self.add_log(f"JSON Parse Error: {str(e)}", "error")
# Save the problematic response for debugging
debug_file = Config.LIBRARY_PATH / f"debug_response_{datetime.now().strftime('%Y%m%d_%H%M%S')}.txt"
with open(debug_file, 'w', encoding='utf-8') as f:
f.write(f"Error: {str(e)}\n")
f.write(f"Position: {e.pos}\n")
f.write("="*60 + "\n")
f.write(spec_text)
logger.error(f"Full response saved to: {debug_file}")
raise ValueError(f"Failed to parse AI response: {str(e)}")
def stage2_generate_code(self, spec: MathSpec, feedback: str = "") -> Tuple[str, List[str]]:
"""
Stage 2: Code Generation (Gemini 3 Pro Preview)
Generates the complete HTML5 application
"""
logger.info("="*60)
logger.info("STAGE 2: CODE GENERATION - Starting Gemini 3 Pro Preview call")
logger.info(f"Concept: {spec.concept_title}")
logger.info(f"Steps to Implement: {len(spec.steps)}")
if feedback:
logger.info(f"User Feedback: {feedback}")
self.add_log("Stage 2: Engineering simulation with Gemini 3 Pro Preview (Thinking Enabled)...", "thinking")
system_instruction = """You are an expert Senior Frontend Engineer specializing in Educational Technology.
Your task is to write a SINGLE, self-contained HTML file that implements the provided "Step-by-Step Visual Proof".
Rules:
1. The file must include all HTML, CSS, and JavaScript internally.
2. Use HTML5 Canvas API or SVG for graphics.
3. Design: Modern, dark theme (background #0f172a, text #e2e8f0).
4. **Interaction**: Implement a "Step Navigation" system.
- Include "Previous" and "Next" buttons.
- Display the current Step Title and Instruction.
- The visualization must change state based on the current step.
5. Ensure math logic is accurate.
6. Do NOT include markdown blocks. Return raw code only.
7. Handle resize events.
8. **VISUAL CLARITY - CRITICAL**:
- PREVENT OVERLAPPING ELEMENTS.
- Use a standard HTML layout (Flexbox/Grid) to separate the Canvas/SVG area from the Controls/Instructions.
"""
coding_prompt = f"""
Implement the following Step-by-Step Math App:
Concept: {spec.concept_title}
Goal: {spec.educational_goal}
Steps to Implement (State Machine):
{chr(10).join([f"{i+1}. [{step['stepTitle']}] {step['instruction']} (Focus: {step['visualFocus']})" for i, step in enumerate(spec.steps)])}
Technical Requirements:
- Visual Elements: {', '.join(spec.visual_spec.get('elements', []))}
- Interactions: {', '.join(spec.visual_spec.get('interactions', []))}
- Math Logic: {spec.visual_spec.get('mathLogic', '')}
{f"USER FEEDBACK / REFINEMENT REQUEST: {feedback}" if feedback else ""}
Generate the full index.html content now.
"""
logger.info("Sending API request to Gemini 3 Pro Preview...")
try:
response = self.client.models.generate_content(
model="gemini-3-pro-preview",
contents=coding_prompt,
config=types.GenerateContentConfig(
system_instruction=system_instruction,
thinking_config=types.ThinkingConfig(
include_thoughts=True,
thinking_budget=-1
)
)
)
logger.info("✓ API response received successfully")
logger.debug(f"Response length: {len(response.text)} characters")
except Exception as api_error:
logger.error(f"API call failed: {str(api_error)}", exc_info=True)
raise
code = response.text
code = code.replace('```html', '').replace('```', '').strip()
logger.info(f"Generated HTML code length: {len(code)} characters")
logger.info(f"Code starts with: {code[:100]}...")
logger.info("STAGE 2: COMPLETE")
logger.info("="*60)
self.add_log("✓ Code generated successfully", "success")
return code, self.process_logs
def _get_math_spec_schema(self):
"""Get JSON schema for MathSpec"""
return types.Schema(
type=types.Type.OBJECT,
properties={
"conceptTitle": types.Schema(type=types.Type.STRING),
"educationalGoal": types.Schema(type=types.Type.STRING),
"explanation": types.Schema(type=types.Type.STRING),
"steps": types.Schema(
type=types.Type.ARRAY,
items=types.Schema(
type=types.Type.OBJECT,
properties={
"stepTitle": types.Schema(type=types.Type.STRING),
"instruction": types.Schema(type=types.Type.STRING),
"visualFocus": types.Schema(type=types.Type.STRING)
},
required=["stepTitle", "instruction", "visualFocus"]
)
),
"visualSpec": types.Schema(
type=types.Type.OBJECT,
properties={
"elements": types.Schema(type=types.Type.ARRAY, items=types.Schema(type=types.Type.STRING)),
"interactions": types.Schema(type=types.Type.ARRAY, items=types.Schema(type=types.Type.STRING)),
"mathLogic": types.Schema(type=types.Type.STRING)
},
required=["elements", "interactions", "mathLogic"]
)
},
required=["conceptTitle", "educationalGoal", "explanation", "steps", "visualSpec"]
)
# ==================== Library Management ====================
class ProofLibrary:
"""Manage saved proofs"""
@staticmethod
def save_proof(spec: MathSpec, code: str, input_data: dict) -> str:
"""Save a proof to the library"""
proof_id = datetime.now().strftime("%Y%m%d_%H%M%S")
filename = f"{proof_id}_{spec.concept_title.replace(' ', '_').lower()}.json"
filepath = Config.LIBRARY_PATH / filename
proof_data = {
"id": proof_id,
"timestamp": datetime.now().isoformat(),
"conceptTitle": spec.concept_title,
"input": input_data,
"concept": spec.to_dict(),
"sourceCode": code
}
with open(filepath, 'w', encoding='utf-8') as f:
json.dump(proof_data, f, indent=2)
return str(filepath)
@staticmethod
def load_proof(filepath: str) -> dict:
"""Load a proof from the library"""
with open(filepath, 'r', encoding='utf-8') as f:
return json.load(f)
@staticmethod
def list_proofs() -> List[Tuple[str, str]]:
"""List all saved proofs"""
proofs = []
for filepath in Config.LIBRARY_PATH.glob("*.json"):
try:
with open(filepath, 'r', encoding='utf-8') as f:
data = json.load(f)
title = data.get("conceptTitle", filepath.stem)
timestamp = data.get("timestamp", "")
proofs.append((str(filepath), f"{title} ({timestamp})"))
except Exception:
continue
return sorted(proofs, key=lambda x: x[0], reverse=True)
@staticmethod
def export_proof(spec: MathSpec, code: str, input_data: dict) -> str:
"""Export proof to downloadable JSON"""
export_data = {
"appName": "StepWise Math Export",
"exportedAt": datetime.now().isoformat(),
"input": input_data,
"concept": spec.to_dict(),
"sourceCode": code
}
filename = f"visual-proof-{spec.concept_title.replace(' ', '-').lower()}.json"
filepath = Config.LIBRARY_PATH / filename
with open(filepath, 'w', encoding='utf-8') as f:
json.dump(export_data, f, indent=2)
return str(filepath)
# ==================== Gradio Application ====================
class StepWiseMathApp:
"""Main Gradio application"""
def __init__(self):
self.current_spec: Optional[MathSpec] = None
self.current_code: str = ""
self.current_logs: List[dict] = []
self.api_key: str = Config.DEFAULT_API_KEY
@staticmethod
def wrap_html_for_iframe(html_code: str) -> str:
"""Wrap HTML code in an iframe for proper rendering in Gradio"""
if not html_code or not html_code.strip():
return ""
# Escape only quotes for srcdoc attribute - do NOT escape HTML tags
# We need to preserve HTML structure but escape the quotes for attribute value
escaped_html = html_code.replace('\\', '\\\\').replace('"', '"')
# Create iframe with the HTML
iframe_html = f'''<iframe
style="width: 100%; height: 600px; border: none; border-radius: 8px;"
srcdoc="{escaped_html}"
sandbox="allow-scripts allow-same-origin"
></iframe>'''
return iframe_html
def _generate_proof_internal(self, text_input: str = "", url_input: str = "",
image_input: Any = None, input_mode: str = "text",
api_key: str = "") -> Tuple[str, str, str, str, str]:
"""Internal method for generating proofs - shared logic for all three MCP tools"""
try:
logger.info("\n" + "#"*60)
logger.info("# GENERATE_PROOF INITIATED")
logger.info("#"*60)
# Validate inputs
logger.info(f"Input Validation - Mode: {input_mode}")
if input_mode == "text" and not text_input.strip():
logger.warning("Validation failed: Empty text input")
return "", "", "❌ Error: Please enter a math problem description", "", ""
elif input_mode == "url" and not url_input.strip():
logger.warning("Validation failed: Empty URL input")
return "", "", "❌ Error: Please enter a valid URL", "", ""
elif input_mode == "image" and image_input is None:
logger.warning("Validation failed: No image provided")
return "", "", "❌ Error: Please upload an image", "", ""
logger.info("✓ Input validation passed")
# Use provided API key or default
logger.info("Checking API key configuration...")
key = api_key.strip() if api_key.strip() else Config.DEFAULT_API_KEY
if not key:
logger.error("No API key configured")
return "", "", "❌ Error: No API key configured. Please set GEMINI_API_KEY or provide one.", "", ""
logger.info("✓ API key found")
self.api_key = key
pipeline = GeminiPipeline(key)
logger.info("Pipeline initialized")
start_time = time.time()
# Stage 1: Analyze concept
logger.info("\nExecuting STAGE 1: Concept Analysis...")
self.current_spec, logs = pipeline.stage1_analyze_concept(
input_text=text_input,
input_url=url_input,
input_image=image_input,
input_mode=input_mode
)
logger.info(f"✓ Stage 1 complete - Concept: {self.current_spec.concept_title}")
# Stage 2: Generate code
logger.info("\nExecuting STAGE 2: Code Generation...")
self.current_code, logs = pipeline.stage2_generate_code(self.current_spec)
self.current_logs = logs
logger.info(f"✓ Stage 2 complete - Generated {len(self.current_code)} characters of HTML")
# Format outputs
elapsed = time.time() - start_time
# Concept details
concept_md = f"""# {self.current_spec.concept_title}
**Educational Goal:** {self.current_spec.educational_goal}
## Explanation
{self.current_spec.explanation}
## Proof Steps
"""
for i, step in enumerate(self.current_spec.steps, 1):
concept_md += f"### Step {i}: {step['stepTitle']}\n\n"
concept_md += f"**Instruction:** {step['instruction']}\n\n"
concept_md += f"**Visual Focus:** {step['visualFocus']}\n\n"
# Logs
logs_text = "\n".join([f"[{log['timestamp']}] {log['message']}" for log in logs])
logs_text += f"\n\n✓ Process Complete in {elapsed:.2f}s"
# Status
status = f"✅ Generated '{self.current_spec.concept_title}' successfully in {elapsed:.2f}s"
logger.info(f"\n✓ GENERATE_PROOF COMPLETED SUCCESSFULLY")
logger.info(f" - Concept: {self.current_spec.concept_title}")
logger.info(f" - Steps: {len(self.current_spec.steps)}")
logger.info(f" - HTML Size: {len(self.current_code)} bytes")
logger.info(f" - Total Time: {elapsed:.2f}s")
logger.info("#"*60 + "\n")
# Wrap HTML in iframe for proper rendering
rendered_html = self.wrap_html_for_iframe(self.current_code)
return rendered_html, concept_md, status, logs_text, self.current_code
except Exception as e:
error_msg = f"❌ Error: {str(e)}"
logger.error(f"GENERATE_PROOF FAILED: {str(e)}", exc_info=True)
logger.error("#"*60 + "\n")
return "", "", error_msg, str(e), ""
def create_math_specification_from_text(self, text_input: str, api_key: str = "") -> str:
"""
Creates a structured mathematical teaching specification from a natural language problem description.
This is the first step in creating an interactive visual proof. Use this when you have a text description of a math problem
and need to transform it into a pedagogical framework with step-by-step learning objectives. Returns a JSON specification
that can be passed to build_interactive_proof_from_specification.
Args:
text_input (str): Natural language description of the mathematical problem or theorem to analyze.
Example: "Prove that the sum of angles in a triangle equals 180 degrees" or
"Explain the Pythagorean theorem visually".
api_key (str, optional): Google Gemini API key for authentication. If empty or not provided, falls back to the GEMINI_API_KEY environment variable. Defaults to "".
Returns:
str: A JSON-formatted string containing the complete mathematical specification with fields:
- conceptTitle: The name of the mathematical concept
- educationalGoal: Learning objective for students
- explanation: Detailed markdown explanation
- steps: Array of interactive teaching steps
- visualSpec: Technical requirements for visualization
Returns a JSON object with an "error" field if the creation fails.
"""
try:
key = api_key.strip() if api_key.strip() else Config.DEFAULT_API_KEY
if not key:
return json.dumps({"error": "No API key configured"})
if not text_input.strip():
return json.dumps({"error": "Empty text input"})
pipeline = GeminiPipeline(key)
spec, logs = pipeline.stage1_analyze_concept(input_text=text_input, input_mode="text")
self.current_spec = spec
return json.dumps(spec.to_dict(), indent=2)
except Exception as e:
return json.dumps({"error": str(e)})
def create_math_specification_from_url(self, url_input: str, api_key: str = "") -> str:
"""
Creates a structured mathematical teaching specification from a web URL containing a math problem.
This is the first step in creating an interactive visual proof. Use this when you have a webpage, article, or
online resource containing a math problem that needs to be transformed into an educational framework.
The tool fetches and analyzes the content from the URL automatically. Returns a JSON specification that can be
passed to build_interactive_proof_from_specification.
Args:
url_input (str): Complete URL pointing to a webpage or resource containing the mathematical problem.
Example: "https://en.wikipedia.org/wiki/Pythagorean_theorem" or
"https://mathworld.wolfram.com/Circle.html". Must be a valid http:// or https:// URL.
api_key (str, optional): Google Gemini API key for authentication. If empty or not provided, falls back to the GEMINI_API_KEY environment variable. Defaults to "".
Returns:
str: A JSON-formatted string containing the complete mathematical specification with fields:
- conceptTitle: The name of the mathematical concept
- educationalGoal: Learning objective for students
- explanation: Detailed markdown explanation
- steps: Array of interactive teaching steps
- visualSpec: Technical requirements for visualization
Returns a JSON object with an "error" field if the URL is invalid or creation fails.
"""
try:
key = api_key.strip() if api_key.strip() else Config.DEFAULT_API_KEY
if not key:
return json.dumps({"error": "No API key configured"})
if not url_input.strip():
return json.dumps({"error": "Empty URL input"})
pipeline = GeminiPipeline(key)
spec, logs = pipeline.stage1_analyze_concept(input_url=url_input, input_mode="url")
self.current_spec = spec
return json.dumps(spec.to_dict(), indent=2)
except Exception as e:
return json.dumps({"error": str(e)})
def create_math_specification_from_image(self, image_input: Any, api_key: str = "") -> str:
"""
Creates a structured mathematical teaching specification from an uploaded image containing a math problem.
This is the first step in creating an interactive visual proof. Use this when you have a photo, screenshot, or
diagram of a math problem (from textbooks, whiteboards, handwritten notes, etc.) that needs to be interpreted
and transformed into an educational framework. The AI performs optical character recognition and mathematical
reasoning on the image. Returns a JSON specification that can be passed to build_interactive_proof_from_specification.
Args:
image_input (Any): A PIL Image object containing the mathematical problem. The image should clearly show
the problem text, equations, or diagrams. Supported formats include JPEG, PNG, and other
common image formats. For best results, ensure good lighting and contrast.
api_key (str, optional): Google Gemini API key for authentication. If empty or not provided, falls back to the GEMINI_API_KEY environment variable. Defaults to "".
Returns:
str: A JSON-formatted string containing the complete mathematical specification with fields:
- conceptTitle: The name of the mathematical concept
- educationalGoal: Learning objective for students
- explanation: Detailed markdown explanation
- steps: Array of interactive teaching steps
- visualSpec: Technical requirements for visualization
Returns a JSON object with an "error" field if the image cannot be processed or creation fails.
"""
try:
key = api_key.strip() if api_key.strip() else Config.DEFAULT_API_KEY
if not key:
return json.dumps({"error": "No API key configured"})
if image_input is None:
return json.dumps({"error": "No image provided"})
pipeline = GeminiPipeline(key)
spec, logs = pipeline.stage1_analyze_concept(input_image=image_input, input_mode="image")
self.current_spec = spec
return json.dumps(spec.to_dict(), indent=2)
except Exception as e:
return json.dumps({"error": str(e)})
def build_interactive_proof_from_specification(self, specification_json: str, api_key: str = "") -> str:
"""
Builds a complete, self-contained HTML/JavaScript application from a mathematical teaching specification.
This is the second step in creating an interactive visual proof. Use this after obtaining a specification
from any of the create_math_specification_from_* methods. The tool produces production-ready code with step-by-step navigation,
interactive visualizations using Canvas/SVG, and a modern dark-themed UI. The output is a single HTML file with all
CSS and JavaScript embedded, ready to be saved and opened in any web browser.
Args:
specification_json (str): A JSON-formatted string containing the mathematical specification. This should be the exact
output from create_math_specification_from_text, create_math_specification_from_url, or create_math_specification_from_image.
The JSON must include: conceptTitle, educationalGoal, explanation, steps array, and visualSpec.
Example: '{"conceptTitle": "Pythagorean Theorem", "steps": [...], ...}'.
api_key (str, optional): Google Gemini API key for authentication. If empty or not provided,
falls back to the GEMINI_API_KEY environment variable or the key used in the
previous specification creation step. Defaults to "".
Returns:
str: A complete HTML document as a string, containing all HTML structure, CSS styling, and JavaScript code
needed for the interactive mathematical proof. The code includes step navigation buttons, dynamic
visualizations, and responsive design. Returns an error message string (starting with "Error:") if
the specification JSON is invalid or build fails.
"""
try:
key = api_key.strip() if api_key.strip() else Config.DEFAULT_API_KEY
if not key:
return "Error: No API key configured"
# Parse the specification JSON
concept_data = json.loads(specification_json)
if "error" in concept_data:
return f"Error: {concept_data['error']}"
spec = MathSpec(concept_data)
self.current_spec = spec
pipeline = GeminiPipeline(key)
code, logs = pipeline.stage2_generate_code(spec)
self.current_code = code
return code
except json.JSONDecodeError as e:
return f"Error: Invalid concept JSON - {str(e)}"
except Exception as e:
return f"Error: {str(e)}"
def generate_proof(self, text_input: str = "", url_input: str = "",
image_input: Any = None, input_mode: str = "Text",
api_key: str = "") -> Tuple[str, str, str, str, str]:
"""
Generate a guided, interactive visual proof (UI version with mode selector).
This method is for the Gradio UI and delegates to the appropriate specific method.
"""
mode_map = {"Text": "text", "Image": "image", "URL": "url"}
return self._generate_proof_internal(
text_input=text_input,
url_input=url_input,
image_input=image_input,
input_mode=mode_map.get(input_mode, "text"),
api_key=api_key
)
def refine_proof(self, feedback: str, api_key: str = "") -> Tuple[str, str, str, str]:
"""Refine the current proof based on feedback"""
if not self.current_spec or not feedback.strip():
rendered_html = self.wrap_html_for_iframe(self.current_code)
return rendered_html, "⚠️ No proof loaded or no feedback provided", "", self.current_code
try:
key = api_key.strip() if api_key.strip() else self.api_key
pipeline = GeminiPipeline(key)
# Regenerate with feedback
self.current_code, logs = pipeline.stage2_generate_code(
self.current_spec,
feedback=feedback
)
logs_text = "\n".join([f"[{log['timestamp']}] {log['message']}" for log in logs])
status = f"✅ Refinement applied based on feedback"
# Wrap HTML in iframe for proper rendering
rendered_html = self.wrap_html_for_iframe(self.current_code)
return rendered_html, status, logs_text, self.current_code
except Exception as e:
rendered_html = self.wrap_html_for_iframe(self.current_code)
return rendered_html, f"❌ Refinement failed: {str(e)}", str(e), self.current_code
def save_to_library(self, text_input: str, url_input: str,
image_input: Any, input_mode: str) -> str:
"""Save current proof to library"""
if not self.current_spec or not self.current_code:
return "⚠️ No proof to save"
try:
input_data = {
"mode": input_mode.lower(),
"text": text_input if input_mode == "Text" else None,
"url": url_input if input_mode == "URL" else None,
"image": image_input if input_mode == "Image" else None
}
filepath = ProofLibrary.save_proof(
self.current_spec,
self.current_code,
input_data
)
return f"✅ Proof saved to library: {filepath}"
except Exception as e:
return f"❌ Save failed: {str(e)}"
def export_proof_file(self, text_input: str, url_input: str,
image_input: Any, input_mode: str) -> Tuple[str, str]:
"""Export proof as downloadable file"""
if not self.current_spec or not self.current_code:
return None, "⚠️ No proof to export"
try:
input_data = {
"mode": input_mode.lower(),
"text": text_input if input_mode == "Text" else None,
"url": url_input if input_mode == "URL" else None
}
filepath = ProofLibrary.export_proof(
self.current_spec,
self.current_code,
input_data
)
return filepath, f"✅ Proof exported: {filepath}"
except Exception as e:
return None, f"❌ Export failed: {str(e)}"
def load_from_library(self, filepath: str) -> Tuple[str, str, str, str, str]:
"""Load a proof from library"""
if not filepath:
return "", "", "", "⚠️ No proof selected", ""
try:
data = ProofLibrary.load_proof(filepath)
self.current_spec = MathSpec(data["concept"])
self.current_code = data["sourceCode"]
# Format concept
concept_md = f"""# {self.current_spec.concept_title}
**Educational Goal:** {self.current_spec.educational_goal}
## Explanation
{self.current_spec.explanation}
"""
# Wrap HTML in iframe for proper rendering
rendered_html = self.wrap_html_for_iframe(self.current_code)
return (
rendered_html,
concept_md,
f"✅ Loaded '{self.current_spec.concept_title}' from library",
"",
self.current_code
)
except Exception as e:
return "", "", f"❌ Load failed: {str(e)}", "", ""
def load_example(self, example_name: str) -> Tuple[str, str, str, str, str]:
"""Load a pre-built example"""
example_files = {
"Probability of an Odd Sum": "001-visual-proof-probability-of-an-odd-sum.json",
"Pythagorean Theorem": "002-visual-proof-pythagorean-theorem.json",
"Orthodiagonal Quads": "003-visual-proof-area-of-quadrilaterals-with-perpendicular-diagonals.json"
}
if example_name not in example_files:
return "", "", "", "⚠️ Example not found", ""
filepath = Config.EXAMPLES_PATH / example_files[example_name]
if not filepath.exists():
# Try React app examples folder
filepath = Path("../react-app/public/examples") / example_files[example_name]
if not filepath.exists():
return "", "", "", f"⚠️ Example file not found: {filepath}", ""
return self.load_from_library(str(filepath))
# ==================== MCP Prompts & Resources ====================
# MCP Prompt functions using @gr.mcp.prompt() decorator
@gr.mcp.prompt()
def create_visual_math_proof(problem_description: str, input_type: str = "text") -> str:
"""Create a complete interactive visual proof for any math problem in two steps.
This prompt guides you through the two-step workflow:
1. Create a mathematical specification from your input
2. Build an interactive HTML/JS proof application
Args:
problem_description: The mathematical problem, theorem, or concept to visualize
input_type: Type of input - 'text' for natural language, 'url' for web resources, or 'image' for uploaded pictures
Returns:
A structured prompt for creating the visual proof
"""
input_types = {
"text": "create_math_specification_from_text",
"url": "create_math_specification_from_url",
"image": "create_math_specification_from_image"
}
tool_name = input_types.get(input_type, input_types["text"])
return f"""Please create an interactive visual proof for this mathematical concept: {problem_description}
Follow this two-step process:
**Step 1: Create Specification**
Use the appropriate tool based on input type '{input_type}':
- For text: {input_types["text"]}
- For URL: {input_types["url"]}
- For image: {input_types["image"]}
Recommended tool for this request: {tool_name}
**Step 2: Build Interactive Proof**
Once you have the JSON specification, use:
- build_interactive_proof_from_specification
The result will be a complete, self-contained HTML application with:
- Step-by-step navigation
- Interactive visualizations
- Real-time mathematical updates
- Modern dark-themed UI
"""
@gr.mcp.prompt()
def create_math_specification(problem_input: str, educational_focus: str = "step-by-step visual understanding") -> str:
"""Analyze a math problem and create a pedagogical specification with teaching steps.
This prompt helps create a detailed teaching plan for any mathematical concept,
breaking it down into interactive learning steps.
Args:
problem_input: The mathematical problem as text, URL, or image description
educational_focus: Specific learning objectives or teaching approach to emphasize
Returns:
A structured prompt for specification creation
"""
return f"""Please analyze this mathematical problem and create a pedagogical specification: {problem_input}
Educational Focus: {educational_focus}
The specification should include:
1. **Concept Title**: Clear name of the mathematical concept
2. **Educational Goal**: What students should learn
3. **Explanation**: Detailed markdown explanation
4. **Steps**: 3-6 interactive teaching steps, each with:
- Step title and instruction
- Visual focus (what changes in the visualization)
5. **Visual Spec**: Technical requirements including:
- Visual elements needed (shapes, graphs, etc.)
- Interactions (drag, click, sliders)
- Mathematical logic and formulas
Use create_math_specification_from_text, create_math_specification_from_url, or create_math_specification_from_image based on your input type.
"""
@gr.mcp.prompt()
def build_from_specification(specification: str, customization: str = "standard") -> str:
"""Build an interactive HTML/JS application from a math teaching specification.
This prompt guides building a production-ready interactive proof application
from a mathematical specification JSON.
Args:
specification: JSON specification from create_math_specification_from_* tools
customization: Additional visual or interactive customizations to apply
Returns:
A structured prompt for building the application
"""
return f"""Please build an interactive proof application from this specification:
{specification}
Customization requests: {customization}
Use the build_interactive_proof_from_specification tool to generate a complete HTML/JavaScript application with:
- Self-contained single file (all CSS/JS embedded)
- Step navigation system (Previous/Next buttons)
- Interactive Canvas/SVG visualizations
- Real-time mathematical updates
- Modern dark theme (#0f172a background, #e2e8f0 text)
- Responsive design that prevents overlapping elements
- Clear separation of controls, text, and diagrams
The output will be ready to save as an .html file and open in any browser.
"""
# MCP Resource functions using @gr.mcp.resource() decorator
@gr.mcp.resource("stepwise://specification-template", mime_type="application/json")
def get_specification_template() -> str:
"""JSON template for mathematical teaching specifications.
Returns the standard structure for creating math concept specifications
that can be used with build_interactive_proof_from_specification.
"""
return json.dumps({
"conceptTitle": "Name of the mathematical concept",
"educationalGoal": "What students should learn from this proof",
"explanation": "Detailed markdown explanation of the concept",
"steps": [
{
"stepTitle": "Step name",
"instruction": "What the student should do or observe",
"visualFocus": "What part of the visualization changes"
}
],
"visualSpec": {
"elements": ["List of visual objects needed"],
"interactions": ["User actions like drag, click, slider"],
"mathLogic": "Formulas and calculations needed"
}
}, indent=2)
@gr.mcp.resource("stepwise://example-pythagorean", mime_type="application/json")
def get_pythagorean_example() -> str:
"""Complete example of Pythagorean theorem visual proof specification.
Returns a real working example showing how to structure a mathematical
proof specification for the Pythagorean theorem.
"""
example_path = Config.EXAMPLES_PATH / "002-visual-proof-pythagorean-theorem.json"
if example_path.exists():
with open(example_path, 'r', encoding='utf-8') as f:
data = json.load(f)
return json.dumps(data.get("concept", {}), indent=2)
return json.dumps({"error": "Example file not found"})
@gr.mcp.resource("stepwise://example-probability", mime_type="application/json")
def get_probability_example() -> str:
"""Complete example of probability concept visualization.
Returns a real working example showing how to structure a mathematical
proof specification for probability concepts.
"""
example_path = Config.EXAMPLES_PATH / "001-visual-proof-probability-of-an-odd-sum.json"
if example_path.exists():
with open(example_path, 'r', encoding='utf-8') as f:
data = json.load(f)
return json.dumps(data.get("concept", {}), indent=2)
return json.dumps({"error": "Example file not found"})
@gr.mcp.resource("stepwise://workflow-guide", mime_type="text/markdown")
def get_workflow_guide() -> str:
"""Guide for using the two-step process: specification creation → proof building.
Returns comprehensive documentation on how to use the StepWise Math
two-step workflow effectively.
"""
return """# StepWise Math: Two-Step Workflow Guide
## Overview
StepWise Math uses a two-step process to create interactive visual proofs:
### Step 1: Create Mathematical Specification
Use one of these tools based on your input type:
- `create_math_specification_from_text` - For natural language descriptions
- `create_math_specification_from_url` - For web resources (Wikipedia, Khan Academy, etc.)
- `create_math_specification_from_image` - For photos/screenshots of problems
**Output**: JSON specification with teaching steps and visual requirements
**Processing time**: ~10-15 seconds
### Step 2: Build Interactive Proof
Use the specification from Step 1:
- `build_interactive_proof_from_specification` - Takes the JSON specification
**Output**: Complete HTML/JavaScript application
**Processing time**: ~20-30 seconds
## Example Workflow
```python
# Step 1: Create specification from text
specification = create_math_specification_from_text(
text_input="Prove that the angles in a triangle sum to 180 degrees"
)
# Step 2: Build interactive proof
html_code = build_interactive_proof_from_specification(
specification_json=specification
)
# Result: Save or display the HTML application
```
## Tips
- Each tool can work independently or as part of the two-step pipeline
- Specifications are reusable - create once, build multiple times with different customizations
- All tools support optional API key parameter for using custom Gemini API keys
- The specification includes 3-6 interactive teaching steps
- Generated apps include step navigation, interactive visualizations, and real-time updates
"""
# ==================== Gradio Interface ====================
def create_gradio_app():
"""Create and configure the Gradio interface"""
app = StepWiseMathApp()
# Load default example on initialization
default_example = "Probability of an Odd Sum"
default_html, default_concept, default_status, default_logs, default_code = app.load_example(default_example)
with gr.Blocks(
title="StepWise Math - Gradio Edition"
) as demo:
# Header
gr.HTML("""
<div class="main-header">
<h1>🎓 StepWise Math</h1>
<p style="font-size: 1.2rem; margin-top: 0.5rem;">Transform Static Math Problems into Living, Interactive Step-by-Step Visual Proofs</p>
<p style="opacity: 0.9; margin-top: 0.5rem;">Powered by Google Gemini 2.5 Flash & Gemini 3.0 Pro with Extended Thinking</p>
</div>
""")
with gr.Row():
# Left Panel - Input
with gr.Column(scale=1):
gr.Markdown("## 📝 Input Method")
input_mode = gr.Radio(
["Text", "Image", "URL"],
value="Text",
label="Select Input Type"
)
with gr.Group():
text_input = gr.Textbox(
label="Describe the Math Problem",
placeholder="e.g., Prove that the sum of angles in a triangle is 180 degrees...",
lines=5,
visible=True
)
image_input = gr.Image(
label="Upload Problem Image",
type="pil",
visible=False
)
url_input = gr.Textbox(
label="Enter Resource URL",
placeholder="https://example.com/math-problem",
visible=False
)
# Toggle visibility based on mode
def update_inputs(mode):
return {
text_input: gr.update(visible=mode == "Text"),
image_input: gr.update(visible=mode == "Image"),
url_input: gr.update(visible=mode == "URL")
}
input_mode.change(
update_inputs,
input_mode,
[text_input, image_input, url_input],
api_visibility="private"
)
generate_btn = gr.Button("🚀 Generate Guided Proof", variant="primary", size="lg")
gr.Markdown("---")
gr.Markdown("## ⚙️ Configuration")
api_key_input = gr.Textbox(
label="Gemini API Key (Optional)",
placeholder="Leave empty to use environment variable",
type="password"
)
gr.Markdown("---")
gr.Markdown("## 📚 Examples")
example_selector = gr.Dropdown(
choices=["Probability of an Odd Sum", "Pythagorean Theorem", "Orthodiagonal Quads"],
value=default_example, # Set default selected example
label="Load Example",
interactive=True
)
load_example_btn = gr.Button("Load Example")
# gr.Markdown("---")
# gr.Markdown("## 💾 Library")
# library_selector = gr.Dropdown(
# choices=ProofLibrary.list_proofs(),
# label="Saved Proofs",
# interactive=True
# )
# refresh_library_btn = gr.Button("🔄 Refresh")
# load_library_btn = gr.Button("Load from Library")
# Right Panel - Output
with gr.Column(scale=2):
status_display = gr.Markdown(default_status, elem_classes="status-box")
with gr.Tabs():
with gr.Tab("🎬 Guided Proof"):
html_output = gr.HTML(value=default_html, label="Interactive Simulation")
with gr.Group():
gr.Markdown("### 💬 Refinement Feedback")
feedback_input = gr.Textbox(
placeholder="e.g., 'Make the triangle red' or 'Add a step to show area'",
label="Feedback"
)
refine_btn = gr.Button("Apply Refinement")
with gr.Tab("📖 Concept Details"):
concept_output = gr.Markdown(value=default_concept)
with gr.Tab("💻 Source Code"):
code_output = gr.Code(value=default_code, language="html", label="Generated HTML/JS")
with gr.Tab("📊 Process Logs"):
logs_output = gr.Textbox(value=default_logs, label="Execution Logs", lines=20)
# with gr.Row():
# save_btn = gr.Button("💾 Save to Library")
# export_btn = gr.Button("📥 Export")
# export_file = gr.File(label="Download", visible=False)
# Hidden components for MCP tool exposure (Two-step process)
with gr.Group(visible=False):
# Step 1: Analyze concept from text
mcp_analyze_text_input = gr.Textbox()
mcp_analyze_text_api_key = gr.Textbox()
mcp_analyze_text_btn = gr.Button("MCP Analyze Concept from Text")
mcp_analyze_text_output = gr.Textbox()
# Step 1: Analyze concept from URL
mcp_analyze_url_input = gr.Textbox()
mcp_analyze_url_api_key = gr.Textbox()
mcp_analyze_url_btn = gr.Button("MCP Analyze Concept from URL")
mcp_analyze_url_output = gr.Textbox()
# Step 1: Analyze concept from image
mcp_analyze_image_input = gr.Image(type="pil")
mcp_analyze_image_api_key = gr.Textbox()
mcp_analyze_image_btn = gr.Button("MCP Analyze Concept from Image")
mcp_analyze_image_output = gr.Textbox()
# Step 2: Generate code from concept
mcp_generate_code_concept_json = gr.Textbox()
mcp_generate_code_api_key = gr.Textbox()
mcp_generate_code_btn = gr.Button("MCP Generate Code from Concept")
mcp_generate_code_output = gr.Textbox()
# Event handlers
generate_btn.click(
fn=app.generate_proof,
inputs=[text_input, url_input, image_input, input_mode, api_key_input],
outputs=[html_output, concept_output, status_display, logs_output, code_output],
api_visibility="private" # UI-only, not exposed to MCP
)
refine_btn.click(
fn=app.refine_proof,
inputs=[feedback_input, api_key_input],
outputs=[html_output, status_display, logs_output, code_output],
api_visibility="private" # UI-only, not exposed to MCP
)
# save_btn.click(
# fn=app.save_to_library,
# inputs=[text_input, url_input, image_input, input_mode],
# outputs=[status_display],
# api_visibility="private" # UI-only, not exposed to MCP
# )
# export_btn.click(
# fn=app.export_proof_file,
# inputs=[text_input, url_input, image_input, input_mode],
# outputs=[export_file, status_display],
# api_visibility="private" # UI-only, not exposed to MCP
# )
# load_library_btn.click(
# fn=app.load_from_library,
# inputs=[library_selector],
# outputs=[html_output, concept_output, status_display, logs_output, code_output],
# api_visibility="private" # UI-only, not exposed to MCP
# )
# refresh_library_btn.click(
# fn=lambda: gr.update(choices=ProofLibrary.list_proofs()),
# outputs=[library_selector],
# api_visibility="private" # UI-only, not exposed to MCP
# )
load_example_btn.click(
fn=app.load_example,
inputs=[example_selector],
outputs=[html_output, concept_output, status_display, logs_output, code_output],
api_visibility="private" # UI-only, not exposed to MCP
)
# MCP tool event handlers (Two-step process)
mcp_analyze_text_btn.click(
fn=app.create_math_specification_from_text,
inputs=[mcp_analyze_text_input, mcp_analyze_text_api_key],
outputs=[mcp_analyze_text_output]
)
mcp_analyze_url_btn.click(
fn=app.create_math_specification_from_url,
inputs=[mcp_analyze_url_input, mcp_analyze_url_api_key],
outputs=[mcp_analyze_url_output]
)
mcp_analyze_image_btn.click(
fn=app.create_math_specification_from_image,
inputs=[mcp_analyze_image_input, mcp_analyze_image_api_key],
outputs=[mcp_analyze_image_output]
)
mcp_generate_code_btn.click(
fn=app.build_interactive_proof_from_specification,
inputs=[mcp_generate_code_concept_json, mcp_generate_code_api_key],
outputs=[mcp_generate_code_output]
)
# Register MCP prompts and resources as API endpoints so they appear in the server schema
gr.api(
create_visual_math_proof,
api_name="create_visual_math_proof_prompt",
api_description=create_visual_math_proof.__doc__
)
gr.api(
create_math_specification,
api_name="create_math_specification_prompt",
api_description=create_math_specification.__doc__
)
gr.api(
build_from_specification,
api_name="build_from_specification_prompt",
api_description=build_from_specification.__doc__
)
gr.api(
get_specification_template,
api_name="specification_template_resource",
api_description=get_specification_template.__doc__
)
gr.api(
get_pythagorean_example,
api_name="example_pythagorean_resource",
api_description=get_pythagorean_example.__doc__
)
gr.api(
get_probability_example,
api_name="example_probability_resource",
api_description=get_probability_example.__doc__
)
gr.api(
get_workflow_guide,
api_name="workflow_guide_resource",
api_description=get_workflow_guide.__doc__
)
return demo
# ==================== Main Entry Point ====================
if __name__ == "__main__":
try:
# Create Gradio interface
demo = create_gradio_app()
# Theme configuration (Gradio 6 style)
theme = gr.themes.Base(
primary_hue="indigo",
secondary_hue="purple",
neutral_hue="slate",
font=gr.themes.GoogleFont("Inter"),
).set(
body_background_fill="*neutral_50",
body_background_fill_dark="*neutral_900",
button_primary_background_fill="*primary_500",
button_primary_background_fill_hover="*primary_600",
button_primary_text_color="white",
)
# Launch with MCP server enabled
demo.launch(
server_name="0.0.0.0",
server_port=7860,
mcp_server=True,
theme=theme,
debug=True,
show_error=True,
quiet=False
)
except Exception as e:
logger.error(f"Failed to start server: {e}")
logger.error("Check that:")
logger.error(" 1. GEMINI_API_KEY environment variable is set")
logger.error(" 2. Port 7860 is available")
logger.error(" 3. All dependencies are installed")
raise