Spaces:

Kamyar-zeinalipour
/

Educational_Tutor

Sleeping

App Files Files Community

Educational_Tutor / app.py

Kamyar-zeinalipour

Update app.py

20783d2 verified 5 months ago

raw

history blame contribute delete

32.2 kB

	# gradio_edu_app_fixed.py
	"""
	Educational Text Tutor – Gradio App (Patched)
	Fixes:
	- Properly updates CheckboxGroup choices using gr.update(...)
	- Dataframes use type="array" to ensure list-of-lists I/O
	- Robust _apply_edits() to handle empty/short rows and parse errors
	- Safer student answer table parsing
	Enhancements:
	- Personalized Study Summary per student on Analysis & Homework tab
	- Profile-aware student simulation with targeted accuracy by subtopic category
	Run:
	pip install gradio openai
	python gradio_edu_app_fixed.py
	"""

	import json
	import uuid
	import re
	import random
	from typing import List, Dict, Any, Tuple
	import gradio as gr

	# --- Utility: OpenAI call helper ------------------------------------------------

	def _call_openai_chat(
	api_key: str,
	model: str,
	messages: List[Dict[str, str]],
	temperature: float = 0.2,
	max_tokens: int = 2000,
	) -> str:
	try:
	from openai import OpenAI
	client = OpenAI(api_key=api_key)
	try:
	resp = client.chat.completions.create(
	model=model,
	messages=messages,
	temperature=temperature,
	max_tokens=max_tokens,
	)
	return resp.choices[0].message.content
	except Exception:
	# Fallback to Responses API
	joined = "\n".join([f"{m['role'].upper()}: {m['content']}" for m in messages])
	resp = client.responses.create(
	model=model,
	input=joined,
	temperature=temperature,
	max_output_tokens=max_tokens,
	)
	if hasattr(resp, "output_text"):
	return resp.output_text
	try:
	return resp.choices[0].message.content # type: ignore[attr-defined]
	except Exception:
	return str(resp)
	except ImportError:
	import openai # type: ignore
	openai.api_key = api_key
	resp = openai.ChatCompletion.create( # type: ignore
	model=model,
	messages=messages,
	temperature=temperature,
	max_tokens=max_tokens,
	)
	return resp["choices"][0]["message"]["content"]


	# --- Prompt templates (ALL literal braces escaped) ------------------------------

	SUBTOPIC_PROMPT = """You are a curriculum designer.
	Extract at least {min_subtopics} clear, non-overlapping subtopics from the EDUCATIONAL TEXT below.
	Each subtopic should be concise (3–8 words) and collectively cover the main ideas.

	Return ONLY valid JSON of the form:
	{{
	"subtopics": ["...", "...", "..."]
	}}

	EDUCATIONAL TEXT:
	---
	{source_text}
	---
	"""

	QUESTION_PROMPT = """You are an assessment designer.
	Create {n_per_subtopic} {qtype_desc} questions for EACH subtopic provided.
	Vary difficulty around {difficulty} difficulty. Keep questions unambiguous and self-contained.

	If question_type == "MCQ": provide exactly four options ("A","B","C","D") and the correct_key as one of "A"/"B"/"C"/"D".
	If question_type == "Short Answer": provide a model_answer that is 1–3 sentences.

	Return ONLY valid JSON in the following schema:
	{{
	"items": [
	{{
	"subtopic": "String",
	"question_type": "{qtype}",
	"question": "String",
	"options": {{"A": "String", "B": "String", "C": "String", "D": "String"}} OR null,
	"correct_key": "A\|B\|C\|D" OR null,
	"model_answer": "String" OR null
	}},
	...
	]
	}}

	SUBTOPICS (the generator must cover these and label each item with the matching subtopic):
	{selected_subtopics}
	"""

	# policy-aware simulation prompt (subtopic-aware)
	SIMULATE_STUDENT_PROMPT = """You will roleplay as a student with this profile:
	---
	{student_profile}
	---

	Policy (you MUST follow):
	{policy_json}

	Guidelines:
	- Use the subtopic of each question to decide where to excel vs. struggle.
	- Hit the target accuracy ranges by category (strong/weak/neutral). If needed, deliberately pick a plausible but wrong choice. Never admit you’re doing this.
	- MCQ: answer ONLY the option key (A/B/C/D). Short Answer: 1–3 sentences; on weak areas, it’s ok to be vague, omit a key detail, or make a misconception.

	Return ONLY valid JSON:
	{{
	"answers": [
	{{"id": "QUESTION_ID", "answer": "String"}},
	...
	]
	}}

	QUESTIONS (with IDs & subtopics):
	{questions_json}
	"""

	GRADING_PROMPT = """You are a strict teacher using a clear rubric.
	Grade each student answer against the provided key/model answer.
	For MCQ: mark correct if the chosen key matches the correct_key.
	For Short Answer: mark correct if the essential facts match (allow paraphrase), else incorrect.
	Give a one-sentence rationale.

	Return ONLY valid JSON with this schema:
	{{
	"results": [
	{{
	"id": "QUESTION_ID",
	"subtopic": "String",
	"is_correct": true/false,
	"score": 1 or 0,
	"rationale": "String"
	}},
	...
	],
	"by_subtopic": [
	{{
	"subtopic": "String",
	"total": N,
	"correct": M,
	"accuracy": 0.0_to_1.0
	}},
	...
	]
	}}

	QUESTIONS (with answers):
	{questions_and_keys_json}

	STUDENT ANSWERS:
	{student_answers_json}
	"""

	PRESCRIPTION_PROMPT = """You are an expert tutor.
	Based on the per-subtopic performance for two students, write:
	1) A concise progress recap for each student (3–5 sentences).
	2) A prioritized list of weak subtopics for each student (up to 5).
	3) For each weak subtopic and each student, suggest a mini-homework plan: 3 concrete practice tasks (in increasing difficulty).

	Return ONLY valid JSON:
	{{
	"student_1": {{
	"recap": "String",
	"weak_subtopics": ["..."],
	"homework": [{{"subtopic":"String","tasks":["...","...","..."]}}]
	}},
	"student_2": {{
	"recap": "String",
	"weak_subtopics": ["..."],
	"homework": [{{"subtopic":"String","tasks":["...","...","..."]}}]
	}}
	}}

	PERFORMANCE SUMMARY (Student 1):
	{perf_1_json}

	PERFORMANCE SUMMARY (Student 2):
	{perf_2_json}
	"""

	# Personalized study summary prompt
	STUDY_SUMMARY_PROMPT = """You are a learning coach. Using the performance summary and the proposed homework for ONE student, write a short personalized home-study summary they can follow on their own.

	Include, in order:
	- Strengths: 2–3 quick bullets.
	- Weak spots: 2–3 bullets naming subtopics (lowest accuracy first).
	- 3 study goals (clear, measurable).
	- 7-day micro-plan: Day 1 → Day 7 bullets (one action each).
	- Motivation tip (1 sentence).

	Constraints:
	- Keep it concise: 120–180 words total.
	- Use simple language and Markdown bullets.
	- Do not mention accuracy numbers; just reflect them implicitly.

	PERFORMANCE:
	{perf_json}

	HOMEWORK (may be empty):
	{hw_json}
	"""

	# --- Core logic -----------------------------------------------------------------

	def extract_subtopics(api_key: str, model: str, text: str, min_subtopics: int) -> List[str]:
	if not api_key or not model:
	raise gr.Error("Please enter your API key and select a model on the Setup tab.")
	if not text.strip():
	raise gr.Error("Please paste the educational text.")
	msg = [
	{"role": "system", "content": "You produce strictly valid JSON."},
	{"role": "user", "content": SUBTOPIC_PROMPT.format(min_subtopics=min_subtopics, source_text=text.strip())},
	]
	raw = _call_openai_chat(api_key, model, msg, temperature=0.1)
	try:
	data = json.loads(raw)
	subs = data.get("subtopics", [])
	subs = [s.strip() for s in subs if isinstance(s, str) and s.strip()]
	if len(subs) < min_subtopics:
	extra_needed = min_subtopics - len(subs)
	subs += [f"Additional Subtopic {i+1}" for i in range(extra_needed)]
	seen, uniq = set(), []
	for s in subs:
	key = s.lower()
	if key not in seen:
	uniq.append(s)
	seen.add(key)
	return uniq
	except Exception:
	lines = [ln.strip("-• \t") for ln in raw.splitlines() if ln.strip()]
	return lines[:max(min_subtopics, len(lines))]

	def generate_questions(
	api_key: str,
	model: str,
	selected_subtopics: List[str],
	qtype: str,
	n_per_subtopic: int,
	difficulty: str
	) -> List[Dict[str, Any]]:
	if not selected_subtopics:
	raise gr.Error("Please select at least one subtopic in the Subtopics tab.")
	qtype_desc = "multiple-choice (MCQ with 4 options)" if qtype == "MCQ" else "short-answer"
	prompt = QUESTION_PROMPT.format(
	n_per_subtopic=n_per_subtopic,
	qtype_desc=qtype_desc,
	difficulty=difficulty,
	qtype=qtype,
	selected_subtopics=json.dumps(selected_subtopics, ensure_ascii=False, indent=2),
	)
	msg = [
	{"role": "system", "content": "You produce strictly valid JSON and follow the schema exactly."},
	{"role": "user", "content": prompt},
	]
	raw = _call_openai_chat(api_key, model, msg, temperature=0.7, max_tokens=2800)
	try:
	data = json.loads(raw)
	items = data.get("items", [])
	except Exception:
	raise gr.Error("The model did not return valid JSON for questions. Try again or reduce counts.")

	questions: List[Dict[str, Any]] = []
	for it in items:
	qid = str(uuid.uuid4())
	subtopic = (it.get("subtopic") or "").strip()
	question_type = it.get("question_type") or qtype
	question = (it.get("question") or "").strip()
	options = it.get("options") or None
	correct_key = it.get("correct_key") or None
	model_answer = it.get("model_answer") or None
	if question_type == "MCQ":
	if not (isinstance(options, dict) and correct_key in {"A", "B", "C", "D"}):
	continue
	else:
	if not model_answer:
	continue
	questions.append({
	"id": qid,
	"subtopic": subtopic,
	"question_type": question_type,
	"question": question,
	"options": options,
	"correct_key": correct_key,
	"model_answer": model_answer,
	})
	return questions


	# --- Policy helpers to force visible divergence between students ----------------

	def _derive_policy(student_profile: str) -> Dict[str, Any]:
	"""Infer strong/weak areas and target accuracies from a free-form profile."""
	p = student_profile.lower()
	strong_terms, weak_terms = set(), set()

	# Heuristics from profile
	if re.search(r"strong in (definitions?\|theor(?:y\|ies)\|concepts?)", p):
	strong_terms \|= {"definition", "definitions", "theory", "theories", "concept", "concepts", "term", "terms"}
	if re.search(r"weak(?:er)? in (definitions?\|theor(?:y\|ies)\|concepts?)", p):
	weak_terms \|= {"definition", "definitions", "theory", "theories", "concept", "concepts", "term", "terms"}

	if re.search(r"strong in (applications?\|problem ?solving\|calculations?)", p):
	strong_terms \|= {"application", "applications", "problem", "problems", "problem solving", "case", "cases", "calculation", "calculations", "practice"}
	if re.search(r"weak(?:er)? in (applications?\|problem ?solving\|calculations?)", p):
	weak_terms \|= {"application", "applications", "problem", "problems", "problem solving", "case", "cases", "calculation", "calculations", "practice"}

	# Generic defaults if not mentioned
	if not strong_terms and "theor" in p:
	strong_terms \|= {"definition","concept","theory","term"}
	if not weak_terms and "careless" in p:
	weak_terms \|= {"definition","term"} # careless → slips on definitional precision

	# Accuracy targets
	overall = 0.65 # baseline realism
	if "anxious" in p: overall -= 0.05
	if "confident" in p: overall += 0.05

	weak_acc = 0.45
	strong_acc = 0.85
	neutral_acc = overall

	careless_rate = 0.15 if "careless" in p else 0.05
	variance = 0.05 # small randomness

	return {
	"strong_terms": sorted(strong_terms),
	"weak_terms": sorted(weak_terms),
	"target_acc": {
	"strong": strong_acc,
	"weak": weak_acc,
	"neutral": neutral_acc
	},
	"overall_target": overall,
	"careless_rate": careless_rate,
	"variance": variance
	}

	def _classify_subtopic(name: str, policy: Dict[str, Any]) -> str:
	s = (name or "").lower()
	strong_hits = any(t in s for t in policy["strong_terms"])
	weak_hits = any(t in s for t in policy["weak_terms"])
	if weak_hits and not strong_hits:
	return "weak"
	if strong_hits and not weak_hits:
	return "strong"
	return "neutral"

	def _wrong_option_letter(correct_key: str) -> str:
	pool = ["A","B","C","D"]
	pool = [x for x in pool if x != (correct_key or "").upper()]
	return random.choice(pool) if pool else "A"

	def _enforce_profile_variation(
	questions: List[Dict[str, Any]],
	answers: List[Dict[str, Any]],
	policy: Dict[str, Any]
	) -> List[Dict[str, Any]]:
	"""Post-process MCQ answers to meet target wrong-rate per category. Short answers untouched."""
	# Indexing
	q_by_id = {q["id"]: q for q in questions}
	ans_by_id = {a["id"]: a["answer"] for a in answers}

	# Collect MCQs per category
	buckets = {"strong": [], "weak": [], "neutral": []}
	for q in questions:
	if q.get("question_type") != "MCQ":
	continue
	cat = _classify_subtopic(q.get("subtopic",""), policy)
	buckets[cat].append(q["id"])

	# For each category, compute current and target wrong counts
	for cat, qids in buckets.items():
	if not qids:
	continue
	target_acc = policy["target_acc"][cat]
	# add small variance so runs don't look identical
	target_acc += random.uniform(-policy["variance"], policy["variance"])
	target_acc = max(0.2, min(0.95, target_acc))

	total = len(qids)
	desired_wrong = round(total * (1 - target_acc))

	# Compute current wrongs
	current_wrong = 0
	correct_candidates = [] # qids currently correct → can flip to wrong if needed
	for qid in qids:
	q = q_by_id[qid]
	stu = (ans_by_id.get(qid) or "").strip().upper()
	correct = (q.get("correct_key") or "").strip().upper()
	if stu and correct and stu == correct:
	correct_candidates.append(qid)
	else:
	current_wrong += 1

	need_more_wrong = max(0, desired_wrong - current_wrong)

	# Flip some correct ones to wrong
	if need_more_wrong > 0 and correct_candidates:
	random.shuffle(correct_candidates)
	for qid in correct_candidates[:need_more_wrong]:
	correct = (q_by_id[qid].get("correct_key") or "").strip().upper()
	ans_by_id[qid] = _wrong_option_letter(correct)

	# Optional: sprinkle a few careless slips across all categories
	if random.random() < policy["careless_rate"]:
	for qid in random.sample(qids, k=max(0, min(1, len(qids)))):
	correct = (q_by_id[qid].get("correct_key") or "").strip().upper()
	if ans_by_id.get(qid, "").upper() == correct:
	ans_by_id[qid] = _wrong_option_letter(correct)

	# Rebuild answers list
	out = []
	for a in answers:
	qid = a["id"]
	out.append({"id": qid, "answer": ans_by_id.get(qid, a["answer"])})
	return out


	def simulate_student_answers(
	api_key: str,
	model: str,
	student_profile: str,
	questions: List[Dict[str, Any]],
	) -> List[Dict[str, Any]]:
	# Pack questions with subtopics so the model can bias performance
	qpack = [
	{
	"id": q["id"],
	"subtopic": q["subtopic"],
	"question_type": q["question_type"],
	"question": q["question"],
	"options": q["options"],
	} for q in questions
	]

	# Derive an explicit policy from the free-text profile
	policy = _derive_policy(student_profile)

	prompt = SIMULATE_STUDENT_PROMPT.format(
	student_profile=student_profile.strip(),
	policy_json=json.dumps(policy, ensure_ascii=False, indent=2),
	questions_json=json.dumps(qpack, ensure_ascii=False, indent=2),
	)
	msg = [
	{"role": "system", "content": "Return strictly valid JSON and keep answers realistic given the policy."},
	{"role": "user", "content": prompt},
	]
	raw = _call_openai_chat(api_key, model, msg, temperature=0.8, max_tokens=3000)
	try:
	data = json.loads(raw)
	answers = data.get("answers", [])
	except Exception:
	raise gr.Error("Failed to parse student answers JSON.")

	# Normalize
	normalized = []
	for a in answers:
	qid = a.get("id")
	ans = (a.get("answer") or "").strip()
	if qid and ans:
	normalized.append({"id": qid, "answer": ans})

	# Keep only answers for our questions
	q_ids = {q["id"] for q in questions}
	filtered = [a for a in normalized if a["id"] in q_ids]

	# Enforce target variation to visibly differentiate students (MCQ-safe)
	filtered = _enforce_profile_variation(questions, filtered, policy)

	return filtered


	def grade_student(
	api_key: str,
	model: str,
	questions: List[Dict[str, Any]],
	student_answers: List[Dict[str, Any]],
	) -> Tuple[List[Dict[str, Any]], List[Dict[str, Any]]]:
	q_map = {q["id"]: q for q in questions}
	bundle = []
	for sa in student_answers:
	qid = sa["id"]
	if qid in q_map:
	q = q_map[qid]
	bundle.append({
	"id": qid,
	"subtopic": q["subtopic"],
	"question_type": q["question_type"],
	"question": q["question"],
	"options": q["options"],
	"correct_key": q.get("correct_key"),
	"model_answer": q.get("model_answer"),
	"student_answer": sa["answer"],
	})
	prompt = GRADING_PROMPT.format(
	questions_and_keys_json=json.dumps(bundle, ensure_ascii=False, indent=2),
	student_answers_json=json.dumps(student_answers, ensure_ascii=False, indent=2),
	)
	msg = [
	{"role": "system", "content": "Return strictly valid JSON following the schema."},
	{"role": "user", "content": prompt},
	]
	raw = _call_openai_chat(api_key, model, msg, temperature=0.0, max_tokens=3500)
	try:
	data = json.loads(raw)
	results = data.get("results", [])
	by_subtopic = data.get("by_subtopic", [])
	for r in results:
	r.setdefault("score", 1 if r.get("is_correct") else 0)
	return results, by_subtopic
	except Exception:
	# Heuristic fallback (MCQ only)
	results = []
	tally = {}
	for b in bundle:
	is_correct = False
	if b["question_type"] == "MCQ":
	is_correct = (b["student_answer"].strip().upper() == (b.get("correct_key") or "").upper())
	score = 1 if is_correct else 0
	results.append({"id": b["id"], "subtopic": b["subtopic"], "is_correct": is_correct, "score": score, "rationale": "Heuristic fallback."})
	t = tally.setdefault(b["subtopic"], {"subtopic": b["subtopic"], "total": 0, "correct": 0, "accuracy": 0.0})
	t["total"] += 1
	t["correct"] += score
	for t in tally.values():
	t["accuracy"] = round(t["correct"] / max(1, t["total"]), 3)
	by_subtopic = list(tally.values())
	return results, by_subtopic


	def prescribe_homework(
	api_key: str,
	model: str,
	perf1: List[Dict[str, Any]],
	perf2: List[Dict[str, Any]],
	) -> Dict[str, Any]:
	prompt = PRESCRIPTION_PROMPT.format(
	perf_1_json=json.dumps(perf1, ensure_ascii=False, indent=2),
	perf_2_json=json.dumps(perf2, ensure_ascii=False, indent=2),
	)
	msg = [
	{"role": "system", "content": "Return strictly valid JSON exactly as requested."},
	{"role": "user", "content": prompt},
	]
	raw = _call_openai_chat(api_key, model, msg, temperature=0.4, max_tokens=2200)
	try:
	data = json.loads(raw)
	return data
	except Exception:
	return {
	"student_1": {"recap": "N/A", "weak_subtopics": [], "homework": []},
	"student_2": {"recap": "N/A", "weak_subtopics": [], "homework": []},
	}

	# Personalized study summary helper
	def summarize_student(
	api_key: str,
	model: str,
	perf: List[Dict[str, Any]],
	rx_student: Dict[str, Any],
	) -> str:
	prompt = STUDY_SUMMARY_PROMPT.format(
	perf_json=json.dumps(perf, ensure_ascii=False, indent=2),
	hw_json=json.dumps(rx_student or {}, ensure_ascii=False, indent=2),
	)
	msg = [
	{"role": "system", "content": "Write concise Markdown only (no JSON, no preamble). Max ~180 words."},
	{"role": "user", "content": prompt},
	]
	text = _call_openai_chat(api_key, model, msg, temperature=0.3, max_tokens=500)
	return text.strip()


	# --- Gradio UI ------------------------------------------------------------------

	with gr.Blocks(css="footer {visibility: hidden}") as demo:
	gr.Markdown("# 🎓 Educational Tutor\nDesign subtopics → generate questions → simulate students → analyze → prescribe homework")

	# App-wide state
	st_api_key = gr.State("")
	st_model = gr.State("gpt-4o-mini")
	st_source_text = gr.State("")
	st_subtopics = gr.State([]) # List[str]
	st_selected_subtopics = gr.State([]) # List[str]
	st_questions = gr.State([]) # List[dict]
	st_student1_answers = gr.State([]) # List[dict]
	st_student2_answers = gr.State([]) # List[dict]
	st_grade1 = gr.State([]) # List[dict] results
	st_grade2 = gr.State([])
	st_perf1 = gr.State([]) # by_subtopic
	st_perf2 = gr.State([])
	st_rx = gr.State({}) # prescriptions

	with gr.Tab("1) Setup"):
	with gr.Row():
	api_key_in = gr.Textbox(label="OpenAI API Key", type="password", placeholder="sk-...")
	model_in = gr.Dropdown(
	label="Model",
	choices=[
	"gpt-4o-mini",
	"gpt-4o",
	"o4-mini",
	"gpt-4.1-mini",
	"gpt-4.1",
	"gpt-3.5-turbo",
	"gpt-4-turbo",
	],
	value="gpt-4o-mini",
	allow_custom_value=True,
	)
	save_btn = gr.Button("Save Settings", variant="primary")
	status = gr.Markdown("")

	def _save_settings(api_key, model):
	if not api_key or not model:
	raise gr.Error("Please provide API key and a model.")
	return api_key, model, f"✅ Settings saved: {model}"

	save_btn.click(
	_save_settings,
	inputs=[api_key_in, model_in],
	outputs=[st_api_key, st_model, status],
	)

	with gr.Tab("2) Subtopics"):
	source_text = gr.Textbox(lines=12, label="Paste Educational Text", placeholder="Paste the text students will learn...")
	min_sub = gr.Slider(2, 20, value=5, step=1, label="Minimum number of subtopics")
	extract_btn = gr.Button("Extract Subtopics", variant="primary")
	subs_out = gr.CheckboxGroup(label="Select subtopics to include", choices=[])

	def _extract(api_key, model, text, min_n):
	subs = extract_subtopics(api_key, model, text, int(min_n))
	return (
	text,
	subs,
	gr.update(choices=subs, value=subs)
	)

	extract_btn.click(
	_extract,
	inputs=[st_api_key, st_model, source_text, min_sub],
	outputs=[st_source_text, st_subtopics, subs_out],
	)

	def _select_subs(selected, available):
	if not available:
	return []
	safe = [s for s in (selected or []) if s in available]
	return safe

	subs_out.change(
	_select_subs,
	inputs=[subs_out, st_subtopics],
	outputs=st_selected_subtopics
	)

	with gr.Tab("3) Generate Questions"):
	with gr.Row():
	qtype = gr.Radio(["Short Answer", "MCQ"], value="MCQ", label="Question Type")
	n_per_sub = gr.Slider(1, 10, value=3, step=1, label="Questions per selected subtopic")
	difficulty = gr.Dropdown(["easy", "medium", "hard"], value="medium", label="Difficulty")
	gen_btn = gr.Button("Generate Questions", variant="primary")
	q_table = gr.Dataframe(
	headers=["id","subtopic","question_type","question","options","correct_key","model_answer"],
	row_count=(1, "dynamic"),
	type="array",
	label="Generated Questions"
	)
	hint = gr.Markdown("You can edit cells. For MCQ 'options', keep valid JSON, e.g. {\"A\":\"...\",\"B\":\"...\",\"C\":\"...\",\"D\":\"...\"}")

	def _gen_q(api_key, model, selected, qtype_value, n, diff):
	qtype_norm = "MCQ" if qtype_value == "MCQ" else "Short Answer"
	qs = generate_questions(api_key, model, selected or [], qtype_norm, int(n), diff)
	rows = []
	for q in qs:
	rows.append([
	q.get("id"),
	q.get("subtopic"),
	q.get("question_type"),
	q.get("question"),
	json.dumps(q.get("options"), ensure_ascii=False) if q.get("options") else None,
	q.get("correct_key"),
	q.get("model_answer"),
	])
	return qs, rows

	gen_btn.click(
	_gen_q,
	inputs=[st_api_key, st_model, st_selected_subtopics, qtype, n_per_sub, difficulty],
	outputs=[st_questions, q_table],
	)

	def _apply_edits(df):
	qs = []
	if not isinstance(df, list):
	return qs
	for row in df:
	if not row:
	continue
	row = list(row) + [None] * (7 - len(row))
	row = row[:7]
	qid, subtopic, qtype_v, question, options_raw, correct_key, model_answer = row
	if not (qid and question):
	continue
	options = None
	if isinstance(options_raw, str) and options_raw.strip():
	try:
	parsed = json.loads(options_raw)
	if isinstance(parsed, dict):
	options = parsed
	except Exception:
	options = None
	elif isinstance(options_raw, dict):
	options = options_raw
	qs.append({
	"id": qid,
	"subtopic": subtopic,
	"question_type": qtype_v,
	"question": question,
	"options": options,
	"correct_key": correct_key,
	"model_answer": model_answer,
	})
	return qs

	q_table.change(_apply_edits, inputs=q_table, outputs=st_questions)

	with gr.Tab("4) Simulate Students"):
	gr.Markdown("Provide brief profiles. The model will answer as each persona.")
	s1 = gr.Textbox(label="Student 1 Profile", value="Diligent but anxious test-taker. Strong in theory, weaker in applications.")
	s2 = gr.Textbox(label="Student 2 Profile", value="Confident and fast, sometimes careless. Strong in applications, weaker in definitions.")
	sim_btn = gr.Button("Simulate Answers", variant="primary")
	s1_table = gr.Dataframe(headers=["question_id","answer"], row_count=(1, "dynamic"), type="array", label="Student 1 Answers (editable)")
	s2_table = gr.Dataframe(headers=["question_id","answer"], row_count=(1, "dynamic"), type="array", label="Student 2 Answers (editable)")

	def _simulate(api_key, model, prof1, prof2, qs):
	if not qs:
	raise gr.Error("No questions generated yet.")
	a1 = simulate_student_answers(api_key, model, prof1, qs)
	a2 = simulate_student_answers(api_key, model, prof2, qs)
	rows1 = [[x["id"], x["answer"]] for x in a1]
	rows2 = [[x["id"], x["answer"]] for x in a2]
	return a1, a2, rows1, rows2

	sim_btn.click(
	_simulate,
	inputs=[st_api_key, st_model, s1, s2, st_questions],
	outputs=[st_student1_answers, st_student2_answers, s1_table, s2_table],
	)

	def _apply_s_answers(df):
	out = []
	if not isinstance(df, list):
	return out
	for r in df:
	if not r or len(r) < 2:
	continue
	qid = r[0]
	ans = r[1]
	if qid and ans is not None:
	out.append({"id": qid, "answer": str(ans)})
	return out

	s1_table.change(_apply_s_answers, inputs=s1_table, outputs=st_student1_answers)
	s2_table.change(_apply_s_answers, inputs=s2_table, outputs=st_student2_answers)

	with gr.Tab("5) Analysis & Homework"):
	grade_btn = gr.Button("Grade & Analyze", variant="primary")
	with gr.Row():
	perf1_tbl = gr.Dataframe(headers=["subtopic","total","correct","accuracy"], row_count=(1, "dynamic"), type="array", label="Student 1 – Per-Subtopic Performance")
	perf2_tbl = gr.Dataframe(headers=["subtopic","total","correct","accuracy"], row_count=(1, "dynamic"), type="array", label="Student 2 – Per-Subtopic Performance")
	report_md = gr.Markdown()
	hw1 = gr.JSON(label="Student 1 – Homework Plan")
	hw2 = gr.JSON(label="Student 2 – Homework Plan")

	# Personalized study summaries
	gr.Markdown("### Student 1 – Personalized Study Summary")
	sum1_md = gr.Markdown()
	gr.Markdown("### Student 2 – Personalized Study Summary")
	sum2_md = gr.Markdown()

	def _grade_and_analyze(api_key, model, qs, a1, a2):
	if not qs or not a1 or not a2:
	raise gr.Error("Need questions and both students' answers first.")
	res1, by1 = grade_student(api_key, model, qs, a1)
	res2, by2 = grade_student(api_key, model, qs, a2)
	table1 = [[b["subtopic"], b["total"], b["correct"], b["accuracy"]] for b in by1]
	table2 = [[b["subtopic"], b["total"], b["correct"], b["accuracy"]] for b in by2]

	def _acc(by):
	if not by: return 0.0
	num = sum(b.get("correct", 0) for b in by)
	den = sum(b.get("total", 0) for b in by)
	return round(num / max(1, den), 3)

	rx_json = prescribe_homework(api_key, model, by1, by2)
	s = f"Student 1 overall accuracy: { _acc(by1) } \nStudent 2 overall accuracy: { _acc(by2) } \n"
	s += "\nNotes: Lower-accuracy subtopics indicate targets for remediation. See Homework and Personalized Summaries below."

	s1_rx = rx_json.get("student_1", {})
	s2_rx = rx_json.get("student_2", {})

	# generate summaries using performance + homework
	s1_sum = summarize_student(api_key, model, by1, s1_rx)
	s2_sum = summarize_student(api_key, model, by2, s2_rx)

	return (
	res1, res2, by1, by2,
	table1, table2,
	s, s1_rx, s2_rx,
	s1_sum, s2_sum
	)

	grade_btn.click(
	_grade_and_analyze,
	inputs=[st_api_key, st_model, st_questions, st_student1_answers, st_student2_answers],
	outputs=[ # order must match return above
	st_grade1, st_grade2, st_perf1, st_perf2,
	perf1_tbl, perf2_tbl,
	report_md, hw1, hw2,
	sum1_md, sum2_md
	],
	)

	gr.Markdown("— Built using Gradio + OpenAI —")

	if __name__ == "__main__":
	# Set share=True to get a public link
	demo.launch(share=True)