Spaces:

datasciencesage
/

chatbot_gradio

Sleeping

App Files Files Community

chatbot_gradio / app.py

datasciencesage

Update app.py

3375af0 verified 3 months ago

raw

history blame contribute delete

16.9 kB

	import gradio as gr
	import os
	import json
	import shutil
	from pathlib import Path
	import base64
	from openai import OpenAI
	import re

	from step1_get_images import get_images

	# ============================================
	# CONFIG
	# ============================================
	SCRIPT_DIR = Path(__file__).parent.resolve()
	os.chdir(SCRIPT_DIR)

	UPLOAD_DIR = SCRIPT_DIR / "all_documents"
	IMAGES_DIR = SCRIPT_DIR / "images"
	TEMP_PDF_DIR = SCRIPT_DIR / "temp_pdfs"

	for d in [UPLOAD_DIR, IMAGES_DIR, TEMP_PDF_DIR]:
	d.mkdir(parents=True, exist_ok=True)

	# ============================================
	# GLOBAL STATE
	# ============================================
	class DocumentState:
	def __init__(self):
	self.page_images = []
	self.ready = False
	self.client = None

	def load_images(self):
	"""Load all page images in sequential order"""
	images = sorted(IMAGES_DIR.glob('*.png'), key=lambda x: x.name)
	self.page_images = [(i+1, str(img)) for i, img in enumerate(images)]
	self.ready = len(self.page_images) > 0
	return len(self.page_images)

	def clear(self):
	self.page_images = []
	self.ready = False

	def init_client(self):
	if self.client is None:
	api_key = os.environ.get("OPENAI_API_KEY")
	if not api_key:
	raise ValueError("OPENAI_API_KEY not set")
	self.client = OpenAI(api_key=api_key)
	return self.client

	state = DocumentState()

	# ============================================
	# HELPER FUNCTIONS
	# ============================================
	def encode_image(image_path):
	"""Encode image to base64"""
	with open(image_path, "rb") as img_file:
	return base64.b64encode(img_file.read()).decode('utf-8')

	def build_vision_content(message):
	"""Build message content with all page images"""
	content = [{"type": "text", "text": message}]

	for page_num, img_path in state.page_images:
	base64_img = encode_image(img_path)
	content.append({
	"type": "image_url",
	"image_url": {
	"url": f"data:image/png;base64,{base64_img}",
	"detail": "high"
	}
	})

	return content

	# ============================================
	# PROCESSING
	# ============================================
	def process_documents(files):
	if not files:
	return "❌ No files uploaded"

	try:
	# Clear previous data
	for f in UPLOAD_DIR.glob("*"):
	f.unlink(missing_ok=True)
	for f in IMAGES_DIR.glob("*"):
	f.unlink(missing_ok=True)

	state.clear()

	# Upload files
	for f in files:
	shutil.copy(f.name, UPLOAD_DIR / Path(f.name).name)

	yield f"📤 Uploaded {len(files)} file(s)\n⚙️ Converting to images..."

	# Convert to images
	get_images(str(UPLOAD_DIR), str(TEMP_PDF_DIR), str(IMAGES_DIR))
	img_count = len(list(IMAGES_DIR.glob('*.png')))

	if img_count == 0:
	yield "❌ No images extracted. Check file format."
	return

	yield f"✅ {img_count} pages converted\n⚙️ Analyzing document..."

	# Load images
	state.load_images()

	yield f"""✅ Document loaded successfully!

	📊 Document Analysis:
	- Pages: {img_count}
	- Format: High-resolution images (300 DPI)
	- Ready for multi-level question generation

	🎯 What you can do:
	- "List all questions with their grade levels"
	- "Generate 5 similar questions to question 3"
	- "Create practice problems for question 8"
	- "What is question 2?"

	The system will:
	✅ Auto-detect the grade level of each question
	✅ Generate different scenarios (not just changed numbers)
	✅ Maintain exact same difficulty level
	"""

	except Exception as e:
	yield f"❌ Error: {str(e)}"

	# ============================================
	# QUESTION ANALYSIS
	# ============================================
	def analyze_question(question_number):
	"""Analyze a specific question and detect its level"""

	try:
	client = state.init_client()

	content = build_vision_content(
	f"""Analyze question {question_number} in this document.

	Provide:
	1. The complete question text
	2. Grade level (Grade 5, Grade 6, ..., Grade 12, or University)
	3. Mathematical topics covered
	4. Difficulty indicators (complexity, concepts required)

	Return ONLY valid JSON:
	{{
	"question_number": {question_number},
	"question_text": "full question here",
	"grade_level": "Grade X or University",
	"topics": ["topic1", "topic2"],
	"difficulty_indicators": ["indicator1", "indicator2"],
	"solution_steps_required": number
	}}"""
	)

	response = client.chat.completions.create(
	model="gpt-4o",
	messages=[
	{"role": "system", "content": "You are a math education expert who analyzes question difficulty. Return ONLY valid JSON."},
	{"role": "user", "content": content}
	],
	temperature=0.1,
	max_tokens=1500
	)

	result = response.choices[0].message.content.strip()

	# Clean JSON
	if result.startswith('```'):
	result = result.split('```')[1]
	if result.startswith('json'):
	result = result[4:]

	analysis = json.loads(result.strip())
	return analysis

	except Exception as e:
	return {"error": str(e)}

	# ============================================
	# QUESTION GENERATION
	# ============================================
	def generate_similar_questions(question_number, count=3):
	"""Generate similar questions maintaining exact grade level"""

	try:
	client = state.init_client()

	# Step 1: Analyze original question
	analysis = analyze_question(question_number)

	if "error" in analysis:
	return f"❌ Error analyzing question: {analysis['error']}"

	grade_level = analysis.get("grade_level", "Unknown")
	topics = analysis.get("topics", [])
	question_text = analysis.get("question_text", "")

	# Step 2: Generate similar questions with strict constraints
	content = build_vision_content(
	f"""You are an expert math educator. Generate {count} NEW practice questions.

	ORIGINAL QUESTION #{question_number}:
	{question_text}

	DETECTED LEVEL: {grade_level}
	TOPICS: {', '.join(topics)}

	🎯 CRITICAL REQUIREMENTS:

	1. EXACT SAME GRADE LEVEL: {grade_level}
	- Use age-appropriate vocabulary
	- Same mathematical concepts complexity
	- Same prerequisite knowledge required
	- Same number of solution steps

	2. TRULY DIFFERENT QUESTIONS (not just number changes):
	- Change the SCENARIO completely (different context/story)
	- Change the OBJECTS involved (if Grade 5 uses apples, use books/toys/etc)
	- Change the SETUP (different word problem structure)
	- Change NUMBERS but keep same computational difficulty
	- Change the QUESTION ASKED (but test same concepts)

	3. MAINTAIN DIFFICULTY:
	- Same level of calculation complexity
	- Same types of operations required
	- Same reasoning depth
	- Same time to solve

	4. EXAMPLES OF GOOD VARIATION:
	❌ BAD: "John has 5 apples..." → "Mary has 7 apples..." (just changed numbers)
	✅ GOOD: "John has 5 apples..." → "A library has 3 shelves with 4 books each. How many books total?"

	5. GRADE-SPECIFIC RULES:
	- Grade 5-6: Simple scenarios, basic operations, whole numbers
	- Grade 7-8: Fractions, decimals, basic algebra, simple geometry
	- Grade 9-10: Advanced algebra, quadratics, trigonometry basics
	- Grade 11-12: Calculus, advanced functions, complex proofs
	- University: Rigorous proofs, advanced calculus, abstract concepts

	Return ONLY valid JSON array:
	[
	{{
	"question_number": 1,
	"question_text": "Complete new question with all details and context",
	"grade_level_confirmed": "{grade_level}",
	"variation_type": "describe what you changed from original",
	"solution": {{
	"steps": ["Step 1: explanation", "Step 2: calculation", "Step 3: final answer"],
	"final_answer": "The answer with units"
	}}
	}}
	]

	Generate EXACTLY {count} questions. NO explanations outside JSON."""
	)

	response = client.chat.completions.create(
	model="gpt-4o",
	messages=[
	{
	"role": "system",
	"content": f"""You are a math question generator expert. You maintain EXACT grade levels.

	STRICT RULES:
	- NEVER increase difficulty beyond original
	- NEVER decrease difficulty below original
	- ALWAYS change scenario, not just numbers
	- ALWAYS verify grade level matches: {grade_level}
	- Return ONLY valid JSON array"""
	},
	{"role": "user", "content": content}
	],
	temperature=0.8, # Higher for creative variation
	max_tokens=4000
	)

	result = response.choices[0].message.content.strip()

	# Clean JSON
	if result.startswith('```'):
	result = result.split('```')[1]
	if result.startswith('json'):
	result = result[4:]

	generated = json.loads(result.strip())

	# Format output
	output = f"""### Generated {len(generated)} Similar Questions to Question #{question_number}

	Original Question: {question_text[:200]}...
	Grade Level: {grade_level}
	Topics: {', '.join(topics)}

	---

	"""

	for i, q in enumerate(generated, 1):
	output += f"""### Similar Question {i}

	Question:
	{q['question_text']}

	What Changed: {q.get('variation_type', 'Scenario variation')}

	Solution:
	"""
	for step in q['solution']['steps']:
	output += f"- {step}\n"

	output += f"\nFinal Answer: {q['solution']['final_answer']}\n\n"
	output += "---\n\n"

	return output

	except Exception as e:
	return f"❌ Generation error: {str(e)}"

	# ============================================
	# CHATBOT
	# ============================================
	def chat(message, history):
	"""Main chat handler"""

	if not message or not message.strip():
	return history

	if not state.ready:
	state.load_images()

	if not state.page_images:
	return history + [(message, "❌ Upload a document first")]

	msg_lower = message.lower()

	# Check for generation request
	if "generate" in msg_lower or "create" in msg_lower or "similar" in msg_lower:
	# Extract question number
	numbers = re.findall(r'\d+', message)
	if not numbers:
	return history + [(message, "Please specify question number. Example: 'generate 5 similar questions to question 3'")]

	# Get count and question number
	if "question" in msg_lower:
	q_num = int(numbers[-1])
	count = int(numbers[0]) if len(numbers) > 1 else 3
	else:
	q_num = int(numbers[0])
	count = int(numbers[1]) if len(numbers) > 1 else 3

	count = min(count, 10) # Limit to 10

	response = generate_similar_questions(q_num, count)
	return history + [(message, response)]

	# General chat with vision
	try:
	client = state.init_client()

	content = build_vision_content(message)

	# Build conversation
	messages = [
	{
	"role": "system",
	"content": f"""You are a math education assistant with access to {len(state.page_images)} pages.

	CAPABILITIES:
	- List all questions with grade levels
	- Show specific questions
	- Explain solutions step-by-step
	- Identify mathematical topics

	Questions are numbered sequentially (1, 2, 3...) across all pages."""
	}
	]

	# Add history (text only)
	for user_msg, bot_msg in history[-3:]:
	if user_msg:
	messages.append({"role": "user", "content": user_msg})
	if bot_msg:
	messages.append({"role": "assistant", "content": bot_msg})

	# Add current with images
	messages.append({"role": "user", "content": content})

	response = client.chat.completions.create(
	model="gpt-4o",
	messages=messages,
	temperature=0.3,
	max_tokens=3000
	)

	answer = response.choices[0].message.content
	return history + [(message, answer)]

	except Exception as e:
	error_msg = f"❌ Error: {str(e)}"
	if "api_key" in str(e).lower():
	error_msg += "\n\nSet OPENAI_API_KEY: export OPENAI_API_KEY='sk-...'"
	return history + [(message, error_msg)]

	# ============================================
	# UI
	# ============================================
	with gr.Blocks(theme=gr.themes.Soft(), title="Multi-Level Math Question Generator") as demo:

	gr.Markdown("""
	# 📚 Multi-Level Math Question Generator
	### Auto-Detect Grade Levels & Generate Similar Questions (Grade 5 → University)
	""")

	gr.Markdown("""
	🎯 What this does:
	- Automatically detects question difficulty (Grade 5, 6, 7... 12, University)
	- Generates truly different questions (not just changed numbers!)
	- Maintains exact same grade level and difficulty
	- Works for elementary to university mathematics

	✅ Key Features:
	- Different scenarios (not "5 apples" → "7 apples")
	- Same computational complexity
	- Grade-appropriate vocabulary
	- Complete solutions with steps
	""")

	with gr.Row():
	with gr.Column(scale=1):
	gr.Markdown("### 📤 Upload Document")

	files = gr.File(
	file_count="multiple",
	file_types=[".pdf", ".docx"],
	label="Upload PDF/DOCX"
	)

	process_btn = gr.Button("🚀 Process", variant="primary", size="lg")

	status = gr.Textbox(label="Status", lines=12, interactive=False)

	gr.Markdown("""
	📋 Supported:
	- Grade 5 to Grade 12
	- University level
	- Mixed difficulty documents
	- 5-30 pages optimal
	""")

	with gr.Column(scale=2):
	gr.Markdown("### 💬 Chat Interface")

	chatbot = gr.Chatbot(
	height=550,
	type="tuples",
	value=[(None, """👋 Welcome!

	Upload a math document to start.

	Example commands:
	• "List all questions"
	• "What is question 5?"
	• "Generate 5 similar questions to question 3"
	• "Create practice problems for question 7"

	I'll automatically detect grade levels and maintain difficulty!
	""")]
	)

	msg = gr.Textbox(
	placeholder="Example: 'generate 5 similar questions to question 3'",
	lines=2,
	label="Your Message"
	)

	with gr.Row():
	send = gr.Button("📤 Send", variant="primary", scale=2)
	clear = gr.Button("🗑️ Clear", scale=1)

	gr.Markdown("### 📚 Example Commands")
	gr.Examples(
	examples=[
	["List all questions with their grade levels"],
	["What is question 1?"],
	["Generate 5 similar questions to question 3"],
	["Create 3 practice problems for question 7"],
	["Generate similar questions to question 2"],
	["What topics are covered in question 5?"],
	],
	inputs=msg,
	)

	gr.Markdown("""
	---
	### 🎓 How It Works

	1. Upload: Your PDF with math questions (any grade level)
	2. Auto-Detect: AI identifies each question's grade level
	3. Generate: Creates truly different questions maintaining:
	- Same grade level
	- Same topics/concepts
	- Same difficulty
	- Different scenarios (not just numbers!)

	Example:
	- Original (Grade 5): "John has 5 apples and buys 3 more. How many total?"
	- Bad Generation: "Mary has 7 apples and buys 2 more. How many total?" ❌
	- Good Generation: "A toy box has 4 cars. Sarah adds 6 more cars. How many cars now?" ✅

	The good version changes the scenario (toys vs apples) but keeps Grade 5 simple addition!
	""")

	# Event handlers
	process_btn.click(process_documents, inputs=files, outputs=status)
	send.click(chat, inputs=[msg, chatbot], outputs=chatbot).then(lambda: "", outputs=msg)
	msg.submit(chat, inputs=[msg, chatbot], outputs=chatbot).then(lambda: "", outputs=msg)
	clear.click(lambda: [], outputs=chatbot)

	if __name__ == "__main__":
	count = state.load_images()
	if count > 0:
	print(f"✅ Loaded {count} pages")

	print("🚀 Multi-Level Math Question Generator")
	print("📝 Upload PDF to begin")
	print("🌐 http://localhost:7860")

	# demo.launch(server_name="0.0.0.0", server_port=7860, share=True)
	demo.launch(
	server_name="0.0.0.0",
	server_port=7860,
	share=True,
	show_api=False,
	inbrowser=True
	)