Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import os | |
| import json | |
| import shutil | |
| from pathlib import Path | |
| import base64 | |
| from openai import OpenAI | |
| import re | |
| from step1_get_images import get_images | |
| # ============================================ | |
| # CONFIG | |
| # ============================================ | |
| SCRIPT_DIR = Path(__file__).parent.resolve() | |
| os.chdir(SCRIPT_DIR) | |
| UPLOAD_DIR = SCRIPT_DIR / "all_documents" | |
| IMAGES_DIR = SCRIPT_DIR / "images" | |
| TEMP_PDF_DIR = SCRIPT_DIR / "temp_pdfs" | |
| for d in [UPLOAD_DIR, IMAGES_DIR, TEMP_PDF_DIR]: | |
| d.mkdir(parents=True, exist_ok=True) | |
| # ============================================ | |
| # GLOBAL STATE | |
| # ============================================ | |
| class DocumentState: | |
| def __init__(self): | |
| self.page_images = [] | |
| self.ready = False | |
| self.client = None | |
| def load_images(self): | |
| """Load all page images in sequential order""" | |
| images = sorted(IMAGES_DIR.glob('*.png'), key=lambda x: x.name) | |
| self.page_images = [(i+1, str(img)) for i, img in enumerate(images)] | |
| self.ready = len(self.page_images) > 0 | |
| return len(self.page_images) | |
| def clear(self): | |
| self.page_images = [] | |
| self.ready = False | |
| def init_client(self): | |
| if self.client is None: | |
| api_key = os.environ.get("OPENAI_API_KEY") | |
| if not api_key: | |
| raise ValueError("OPENAI_API_KEY not set") | |
| self.client = OpenAI(api_key=api_key) | |
| return self.client | |
| state = DocumentState() | |
| # ============================================ | |
| # HELPER FUNCTIONS | |
| # ============================================ | |
| def encode_image(image_path): | |
| """Encode image to base64""" | |
| with open(image_path, "rb") as img_file: | |
| return base64.b64encode(img_file.read()).decode('utf-8') | |
| def build_vision_content(message): | |
| """Build message content with all page images""" | |
| content = [{"type": "text", "text": message}] | |
| for page_num, img_path in state.page_images: | |
| base64_img = encode_image(img_path) | |
| content.append({ | |
| "type": "image_url", | |
| "image_url": { | |
| "url": f"data:image/png;base64,{base64_img}", | |
| "detail": "high" | |
| } | |
| }) | |
| return content | |
| # ============================================ | |
| # PROCESSING | |
| # ============================================ | |
| def process_documents(files): | |
| if not files: | |
| return "β No files uploaded" | |
| try: | |
| # Clear previous data | |
| for f in UPLOAD_DIR.glob("*"): | |
| f.unlink(missing_ok=True) | |
| for f in IMAGES_DIR.glob("*"): | |
| f.unlink(missing_ok=True) | |
| state.clear() | |
| # Upload files | |
| for f in files: | |
| shutil.copy(f.name, UPLOAD_DIR / Path(f.name).name) | |
| yield f"π€ Uploaded {len(files)} file(s)\nβοΈ Converting to images..." | |
| # Convert to images | |
| get_images(str(UPLOAD_DIR), str(TEMP_PDF_DIR), str(IMAGES_DIR)) | |
| img_count = len(list(IMAGES_DIR.glob('*.png'))) | |
| if img_count == 0: | |
| yield "β No images extracted. Check file format." | |
| return | |
| yield f"β {img_count} pages converted\nβοΈ Analyzing document..." | |
| # Load images | |
| state.load_images() | |
| yield f"""β Document loaded successfully! | |
| π **Document Analysis:** | |
| - Pages: {img_count} | |
| - Format: High-resolution images (300 DPI) | |
| - Ready for multi-level question generation | |
| π― **What you can do:** | |
| - "List all questions with their grade levels" | |
| - "Generate 5 similar questions to question 3" | |
| - "Create practice problems for question 8" | |
| - "What is question 2?" | |
| The system will: | |
| β Auto-detect the grade level of each question | |
| β Generate different scenarios (not just changed numbers) | |
| β Maintain exact same difficulty level | |
| """ | |
| except Exception as e: | |
| yield f"β Error: {str(e)}" | |
| # ============================================ | |
| # QUESTION ANALYSIS | |
| # ============================================ | |
| def analyze_question(question_number): | |
| """Analyze a specific question and detect its level""" | |
| try: | |
| client = state.init_client() | |
| content = build_vision_content( | |
| f"""Analyze question {question_number} in this document. | |
| Provide: | |
| 1. The complete question text | |
| 2. Grade level (Grade 5, Grade 6, ..., Grade 12, or University) | |
| 3. Mathematical topics covered | |
| 4. Difficulty indicators (complexity, concepts required) | |
| Return ONLY valid JSON: | |
| {{ | |
| "question_number": {question_number}, | |
| "question_text": "full question here", | |
| "grade_level": "Grade X or University", | |
| "topics": ["topic1", "topic2"], | |
| "difficulty_indicators": ["indicator1", "indicator2"], | |
| "solution_steps_required": number | |
| }}""" | |
| ) | |
| response = client.chat.completions.create( | |
| model="gpt-4o", | |
| messages=[ | |
| {"role": "system", "content": "You are a math education expert who analyzes question difficulty. Return ONLY valid JSON."}, | |
| {"role": "user", "content": content} | |
| ], | |
| temperature=0.1, | |
| max_tokens=1500 | |
| ) | |
| result = response.choices[0].message.content.strip() | |
| # Clean JSON | |
| if result.startswith('```'): | |
| result = result.split('```')[1] | |
| if result.startswith('json'): | |
| result = result[4:] | |
| analysis = json.loads(result.strip()) | |
| return analysis | |
| except Exception as e: | |
| return {"error": str(e)} | |
| # ============================================ | |
| # QUESTION GENERATION | |
| # ============================================ | |
| def generate_similar_questions(question_number, count=3): | |
| """Generate similar questions maintaining exact grade level""" | |
| try: | |
| client = state.init_client() | |
| # Step 1: Analyze original question | |
| analysis = analyze_question(question_number) | |
| if "error" in analysis: | |
| return f"β Error analyzing question: {analysis['error']}" | |
| grade_level = analysis.get("grade_level", "Unknown") | |
| topics = analysis.get("topics", []) | |
| question_text = analysis.get("question_text", "") | |
| # Step 2: Generate similar questions with strict constraints | |
| content = build_vision_content( | |
| f"""You are an expert math educator. Generate {count} NEW practice questions. | |
| ORIGINAL QUESTION #{question_number}: | |
| {question_text} | |
| DETECTED LEVEL: {grade_level} | |
| TOPICS: {', '.join(topics)} | |
| π― CRITICAL REQUIREMENTS: | |
| 1. **EXACT SAME GRADE LEVEL**: {grade_level} | |
| - Use age-appropriate vocabulary | |
| - Same mathematical concepts complexity | |
| - Same prerequisite knowledge required | |
| - Same number of solution steps | |
| 2. **TRULY DIFFERENT QUESTIONS** (not just number changes): | |
| - Change the SCENARIO completely (different context/story) | |
| - Change the OBJECTS involved (if Grade 5 uses apples, use books/toys/etc) | |
| - Change the SETUP (different word problem structure) | |
| - Change NUMBERS but keep same computational difficulty | |
| - Change the QUESTION ASKED (but test same concepts) | |
| 3. **MAINTAIN DIFFICULTY**: | |
| - Same level of calculation complexity | |
| - Same types of operations required | |
| - Same reasoning depth | |
| - Same time to solve | |
| 4. **EXAMPLES OF GOOD VARIATION**: | |
| β BAD: "John has 5 apples..." β "Mary has 7 apples..." (just changed numbers) | |
| β GOOD: "John has 5 apples..." β "A library has 3 shelves with 4 books each. How many books total?" | |
| 5. **GRADE-SPECIFIC RULES**: | |
| - Grade 5-6: Simple scenarios, basic operations, whole numbers | |
| - Grade 7-8: Fractions, decimals, basic algebra, simple geometry | |
| - Grade 9-10: Advanced algebra, quadratics, trigonometry basics | |
| - Grade 11-12: Calculus, advanced functions, complex proofs | |
| - University: Rigorous proofs, advanced calculus, abstract concepts | |
| Return ONLY valid JSON array: | |
| [ | |
| {{ | |
| "question_number": 1, | |
| "question_text": "Complete new question with all details and context", | |
| "grade_level_confirmed": "{grade_level}", | |
| "variation_type": "describe what you changed from original", | |
| "solution": {{ | |
| "steps": ["Step 1: explanation", "Step 2: calculation", "Step 3: final answer"], | |
| "final_answer": "The answer with units" | |
| }} | |
| }} | |
| ] | |
| Generate EXACTLY {count} questions. NO explanations outside JSON.""" | |
| ) | |
| response = client.chat.completions.create( | |
| model="gpt-4o", | |
| messages=[ | |
| { | |
| "role": "system", | |
| "content": f"""You are a math question generator expert. You maintain EXACT grade levels. | |
| STRICT RULES: | |
| - NEVER increase difficulty beyond original | |
| - NEVER decrease difficulty below original | |
| - ALWAYS change scenario, not just numbers | |
| - ALWAYS verify grade level matches: {grade_level} | |
| - Return ONLY valid JSON array""" | |
| }, | |
| {"role": "user", "content": content} | |
| ], | |
| temperature=0.8, # Higher for creative variation | |
| max_tokens=4000 | |
| ) | |
| result = response.choices[0].message.content.strip() | |
| # Clean JSON | |
| if result.startswith('```'): | |
| result = result.split('```')[1] | |
| if result.startswith('json'): | |
| result = result[4:] | |
| generated = json.loads(result.strip()) | |
| # Format output | |
| output = f"""### Generated {len(generated)} Similar Questions to Question #{question_number} | |
| **Original Question:** {question_text[:200]}... | |
| **Grade Level:** {grade_level} | |
| **Topics:** {', '.join(topics)} | |
| --- | |
| """ | |
| for i, q in enumerate(generated, 1): | |
| output += f"""### Similar Question {i} | |
| **Question:** | |
| {q['question_text']} | |
| **What Changed:** {q.get('variation_type', 'Scenario variation')} | |
| **Solution:** | |
| """ | |
| for step in q['solution']['steps']: | |
| output += f"- {step}\n" | |
| output += f"\n**Final Answer:** {q['solution']['final_answer']}\n\n" | |
| output += "---\n\n" | |
| return output | |
| except Exception as e: | |
| return f"β Generation error: {str(e)}" | |
| # ============================================ | |
| # CHATBOT | |
| # ============================================ | |
| def chat(message, history): | |
| """Main chat handler""" | |
| if not message or not message.strip(): | |
| return history | |
| if not state.ready: | |
| state.load_images() | |
| if not state.page_images: | |
| return history + [(message, "β Upload a document first")] | |
| msg_lower = message.lower() | |
| # Check for generation request | |
| if "generate" in msg_lower or "create" in msg_lower or "similar" in msg_lower: | |
| # Extract question number | |
| numbers = re.findall(r'\d+', message) | |
| if not numbers: | |
| return history + [(message, "Please specify question number. Example: 'generate 5 similar questions to question 3'")] | |
| # Get count and question number | |
| if "question" in msg_lower: | |
| q_num = int(numbers[-1]) | |
| count = int(numbers[0]) if len(numbers) > 1 else 3 | |
| else: | |
| q_num = int(numbers[0]) | |
| count = int(numbers[1]) if len(numbers) > 1 else 3 | |
| count = min(count, 10) # Limit to 10 | |
| response = generate_similar_questions(q_num, count) | |
| return history + [(message, response)] | |
| # General chat with vision | |
| try: | |
| client = state.init_client() | |
| content = build_vision_content(message) | |
| # Build conversation | |
| messages = [ | |
| { | |
| "role": "system", | |
| "content": f"""You are a math education assistant with access to {len(state.page_images)} pages. | |
| CAPABILITIES: | |
| - List all questions with grade levels | |
| - Show specific questions | |
| - Explain solutions step-by-step | |
| - Identify mathematical topics | |
| Questions are numbered sequentially (1, 2, 3...) across all pages.""" | |
| } | |
| ] | |
| # Add history (text only) | |
| for user_msg, bot_msg in history[-3:]: | |
| if user_msg: | |
| messages.append({"role": "user", "content": user_msg}) | |
| if bot_msg: | |
| messages.append({"role": "assistant", "content": bot_msg}) | |
| # Add current with images | |
| messages.append({"role": "user", "content": content}) | |
| response = client.chat.completions.create( | |
| model="gpt-4o", | |
| messages=messages, | |
| temperature=0.3, | |
| max_tokens=3000 | |
| ) | |
| answer = response.choices[0].message.content | |
| return history + [(message, answer)] | |
| except Exception as e: | |
| error_msg = f"β Error: {str(e)}" | |
| if "api_key" in str(e).lower(): | |
| error_msg += "\n\nSet OPENAI_API_KEY: export OPENAI_API_KEY='sk-...'" | |
| return history + [(message, error_msg)] | |
| # ============================================ | |
| # UI | |
| # ============================================ | |
| with gr.Blocks(theme=gr.themes.Soft(), title="Multi-Level Math Question Generator") as demo: | |
| gr.Markdown(""" | |
| # π Multi-Level Math Question Generator | |
| ### Auto-Detect Grade Levels & Generate Similar Questions (Grade 5 β University) | |
| """) | |
| gr.Markdown(""" | |
| **π― What this does:** | |
| - Automatically detects question difficulty (Grade 5, 6, 7... 12, University) | |
| - Generates truly different questions (not just changed numbers!) | |
| - Maintains exact same grade level and difficulty | |
| - Works for elementary to university mathematics | |
| **β Key Features:** | |
| - Different scenarios (not "5 apples" β "7 apples") | |
| - Same computational complexity | |
| - Grade-appropriate vocabulary | |
| - Complete solutions with steps | |
| """) | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| gr.Markdown("### π€ Upload Document") | |
| files = gr.File( | |
| file_count="multiple", | |
| file_types=[".pdf", ".docx"], | |
| label="Upload PDF/DOCX" | |
| ) | |
| process_btn = gr.Button("π Process", variant="primary", size="lg") | |
| status = gr.Textbox(label="Status", lines=12, interactive=False) | |
| gr.Markdown(""" | |
| **π Supported:** | |
| - Grade 5 to Grade 12 | |
| - University level | |
| - Mixed difficulty documents | |
| - 5-30 pages optimal | |
| """) | |
| with gr.Column(scale=2): | |
| gr.Markdown("### π¬ Chat Interface") | |
| chatbot = gr.Chatbot( | |
| height=550, | |
| type="tuples", | |
| value=[(None, """π **Welcome!** | |
| Upload a math document to start. | |
| **Example commands:** | |
| β’ "List all questions" | |
| β’ "What is question 5?" | |
| β’ "Generate 5 similar questions to question 3" | |
| β’ "Create practice problems for question 7" | |
| I'll automatically detect grade levels and maintain difficulty! | |
| """)] | |
| ) | |
| msg = gr.Textbox( | |
| placeholder="Example: 'generate 5 similar questions to question 3'", | |
| lines=2, | |
| label="Your Message" | |
| ) | |
| with gr.Row(): | |
| send = gr.Button("π€ Send", variant="primary", scale=2) | |
| clear = gr.Button("ποΈ Clear", scale=1) | |
| gr.Markdown("### π Example Commands") | |
| gr.Examples( | |
| examples=[ | |
| ["List all questions with their grade levels"], | |
| ["What is question 1?"], | |
| ["Generate 5 similar questions to question 3"], | |
| ["Create 3 practice problems for question 7"], | |
| ["Generate similar questions to question 2"], | |
| ["What topics are covered in question 5?"], | |
| ], | |
| inputs=msg, | |
| ) | |
| gr.Markdown(""" | |
| --- | |
| ### π How It Works | |
| 1. **Upload**: Your PDF with math questions (any grade level) | |
| 2. **Auto-Detect**: AI identifies each question's grade level | |
| 3. **Generate**: Creates truly different questions maintaining: | |
| - Same grade level | |
| - Same topics/concepts | |
| - Same difficulty | |
| - Different scenarios (not just numbers!) | |
| **Example:** | |
| - **Original (Grade 5)**: "John has 5 apples and buys 3 more. How many total?" | |
| - **Bad Generation**: "Mary has 7 apples and buys 2 more. How many total?" β | |
| - **Good Generation**: "A toy box has 4 cars. Sarah adds 6 more cars. How many cars now?" β | |
| The good version changes the scenario (toys vs apples) but keeps Grade 5 simple addition! | |
| """) | |
| # Event handlers | |
| process_btn.click(process_documents, inputs=files, outputs=status) | |
| send.click(chat, inputs=[msg, chatbot], outputs=chatbot).then(lambda: "", outputs=msg) | |
| msg.submit(chat, inputs=[msg, chatbot], outputs=chatbot).then(lambda: "", outputs=msg) | |
| clear.click(lambda: [], outputs=chatbot) | |
| if __name__ == "__main__": | |
| count = state.load_images() | |
| if count > 0: | |
| print(f"β Loaded {count} pages") | |
| print("π Multi-Level Math Question Generator") | |
| print("π Upload PDF to begin") | |
| print("π http://localhost:7860") | |
| # demo.launch(server_name="0.0.0.0", server_port=7860, share=True) | |
| demo.launch( | |
| server_name="0.0.0.0", | |
| server_port=7860, | |
| share=True, | |
| show_api=False, | |
| inbrowser=True | |
| ) |