Kamyar-zeinalipour's picture
Update app.py
20783d2 verified
# gradio_edu_app_fixed.py
"""
Educational Text Tutor – Gradio App (Patched)
Fixes:
- Properly updates CheckboxGroup choices using gr.update(...)
- Dataframes use type="array" to ensure list-of-lists I/O
- Robust _apply_edits() to handle empty/short rows and parse errors
- Safer student answer table parsing
Enhancements:
- Personalized Study Summary per student on Analysis & Homework tab
- Profile-aware student simulation with targeted accuracy by subtopic category
Run:
pip install gradio openai
python gradio_edu_app_fixed.py
"""
import json
import uuid
import re
import random
from typing import List, Dict, Any, Tuple
import gradio as gr
# --- Utility: OpenAI call helper ------------------------------------------------
def _call_openai_chat(
api_key: str,
model: str,
messages: List[Dict[str, str]],
temperature: float = 0.2,
max_tokens: int = 2000,
) -> str:
try:
from openai import OpenAI
client = OpenAI(api_key=api_key)
try:
resp = client.chat.completions.create(
model=model,
messages=messages,
temperature=temperature,
max_tokens=max_tokens,
)
return resp.choices[0].message.content
except Exception:
# Fallback to Responses API
joined = "\n".join([f"{m['role'].upper()}: {m['content']}" for m in messages])
resp = client.responses.create(
model=model,
input=joined,
temperature=temperature,
max_output_tokens=max_tokens,
)
if hasattr(resp, "output_text"):
return resp.output_text
try:
return resp.choices[0].message.content # type: ignore[attr-defined]
except Exception:
return str(resp)
except ImportError:
import openai # type: ignore
openai.api_key = api_key
resp = openai.ChatCompletion.create( # type: ignore
model=model,
messages=messages,
temperature=temperature,
max_tokens=max_tokens,
)
return resp["choices"][0]["message"]["content"]
# --- Prompt templates (ALL literal braces escaped) ------------------------------
SUBTOPIC_PROMPT = """You are a curriculum designer.
Extract at least {min_subtopics} clear, non-overlapping subtopics from the EDUCATIONAL TEXT below.
Each subtopic should be concise (3–8 words) and collectively cover the main ideas.
Return ONLY valid JSON of the form:
{{
"subtopics": ["...", "...", "..."]
}}
EDUCATIONAL TEXT:
---
{source_text}
---
"""
QUESTION_PROMPT = """You are an assessment designer.
Create {n_per_subtopic} {qtype_desc} questions for EACH subtopic provided.
Vary difficulty around {difficulty} difficulty. Keep questions unambiguous and self-contained.
If question_type == "MCQ": provide *exactly four* options ("A","B","C","D") and the correct_key as one of "A"/"B"/"C"/"D".
If question_type == "Short Answer": provide a model_answer that is 1–3 sentences.
Return ONLY valid JSON in the following schema:
{{
"items": [
{{
"subtopic": "String",
"question_type": "{qtype}",
"question": "String",
"options": {{"A": "String", "B": "String", "C": "String", "D": "String"}} OR null,
"correct_key": "A|B|C|D" OR null,
"model_answer": "String" OR null
}},
...
]
}}
SUBTOPICS (the generator must cover these and label each item with the matching subtopic):
{selected_subtopics}
"""
# policy-aware simulation prompt (subtopic-aware)
SIMULATE_STUDENT_PROMPT = """You will roleplay as a student with this profile:
---
{student_profile}
---
**Policy (you MUST follow):**
{policy_json}
Guidelines:
- Use the **subtopic** of each question to decide where to excel vs. struggle.
- Hit the target accuracy ranges by category (strong/weak/neutral). If needed, deliberately pick a plausible but wrong choice. Never admit you’re doing this.
- MCQ: answer ONLY the option key (A/B/C/D). Short Answer: 1–3 sentences; on weak areas, it’s ok to be vague, omit a key detail, or make a misconception.
Return ONLY valid JSON:
{{
"answers": [
{{"id": "QUESTION_ID", "answer": "String"}},
...
]
}}
QUESTIONS (with IDs & subtopics):
{questions_json}
"""
GRADING_PROMPT = """You are a strict teacher using a clear rubric.
Grade each student answer against the provided key/model answer.
For MCQ: mark correct if the chosen key matches the correct_key.
For Short Answer: mark correct if the essential facts match (allow paraphrase), else incorrect.
Give a one-sentence rationale.
Return ONLY valid JSON with this schema:
{{
"results": [
{{
"id": "QUESTION_ID",
"subtopic": "String",
"is_correct": true/false,
"score": 1 or 0,
"rationale": "String"
}},
...
],
"by_subtopic": [
{{
"subtopic": "String",
"total": N,
"correct": M,
"accuracy": 0.0_to_1.0
}},
...
]
}}
QUESTIONS (with answers):
{questions_and_keys_json}
STUDENT ANSWERS:
{student_answers_json}
"""
PRESCRIPTION_PROMPT = """You are an expert tutor.
Based on the per-subtopic performance for two students, write:
1) A concise progress recap for each student (3–5 sentences).
2) A prioritized list of weak subtopics for each student (up to 5).
3) For each weak subtopic and each student, suggest a mini-homework plan: 3 concrete practice tasks (in increasing difficulty).
Return ONLY valid JSON:
{{
"student_1": {{
"recap": "String",
"weak_subtopics": ["..."],
"homework": [{{"subtopic":"String","tasks":["...","...","..."]}}]
}},
"student_2": {{
"recap": "String",
"weak_subtopics": ["..."],
"homework": [{{"subtopic":"String","tasks":["...","...","..."]}}]
}}
}}
PERFORMANCE SUMMARY (Student 1):
{perf_1_json}
PERFORMANCE SUMMARY (Student 2):
{perf_2_json}
"""
# Personalized study summary prompt
STUDY_SUMMARY_PROMPT = """You are a learning coach. Using the performance summary and the proposed homework for ONE student, write a short **personalized home-study summary** they can follow on their own.
Include, in order:
- **Strengths:** 2–3 quick bullets.
- **Weak spots:** 2–3 bullets naming subtopics (lowest accuracy first).
- **3 study goals** (clear, measurable).
- **7-day micro-plan:** Day 1 β†’ Day 7 bullets (one action each).
- **Motivation tip** (1 sentence).
Constraints:
- Keep it concise: 120–180 words total.
- Use simple language and Markdown bullets.
- Do not mention accuracy numbers; just reflect them implicitly.
PERFORMANCE:
{perf_json}
HOMEWORK (may be empty):
{hw_json}
"""
# --- Core logic -----------------------------------------------------------------
def extract_subtopics(api_key: str, model: str, text: str, min_subtopics: int) -> List[str]:
if not api_key or not model:
raise gr.Error("Please enter your API key and select a model on the Setup tab.")
if not text.strip():
raise gr.Error("Please paste the educational text.")
msg = [
{"role": "system", "content": "You produce strictly valid JSON."},
{"role": "user", "content": SUBTOPIC_PROMPT.format(min_subtopics=min_subtopics, source_text=text.strip())},
]
raw = _call_openai_chat(api_key, model, msg, temperature=0.1)
try:
data = json.loads(raw)
subs = data.get("subtopics", [])
subs = [s.strip() for s in subs if isinstance(s, str) and s.strip()]
if len(subs) < min_subtopics:
extra_needed = min_subtopics - len(subs)
subs += [f"Additional Subtopic {i+1}" for i in range(extra_needed)]
seen, uniq = set(), []
for s in subs:
key = s.lower()
if key not in seen:
uniq.append(s)
seen.add(key)
return uniq
except Exception:
lines = [ln.strip("-β€’ \t") for ln in raw.splitlines() if ln.strip()]
return lines[:max(min_subtopics, len(lines))]
def generate_questions(
api_key: str,
model: str,
selected_subtopics: List[str],
qtype: str,
n_per_subtopic: int,
difficulty: str
) -> List[Dict[str, Any]]:
if not selected_subtopics:
raise gr.Error("Please select at least one subtopic in the Subtopics tab.")
qtype_desc = "multiple-choice (MCQ with 4 options)" if qtype == "MCQ" else "short-answer"
prompt = QUESTION_PROMPT.format(
n_per_subtopic=n_per_subtopic,
qtype_desc=qtype_desc,
difficulty=difficulty,
qtype=qtype,
selected_subtopics=json.dumps(selected_subtopics, ensure_ascii=False, indent=2),
)
msg = [
{"role": "system", "content": "You produce strictly valid JSON and follow the schema exactly."},
{"role": "user", "content": prompt},
]
raw = _call_openai_chat(api_key, model, msg, temperature=0.7, max_tokens=2800)
try:
data = json.loads(raw)
items = data.get("items", [])
except Exception:
raise gr.Error("The model did not return valid JSON for questions. Try again or reduce counts.")
questions: List[Dict[str, Any]] = []
for it in items:
qid = str(uuid.uuid4())
subtopic = (it.get("subtopic") or "").strip()
question_type = it.get("question_type") or qtype
question = (it.get("question") or "").strip()
options = it.get("options") or None
correct_key = it.get("correct_key") or None
model_answer = it.get("model_answer") or None
if question_type == "MCQ":
if not (isinstance(options, dict) and correct_key in {"A", "B", "C", "D"}):
continue
else:
if not model_answer:
continue
questions.append({
"id": qid,
"subtopic": subtopic,
"question_type": question_type,
"question": question,
"options": options,
"correct_key": correct_key,
"model_answer": model_answer,
})
return questions
# --- Policy helpers to force visible divergence between students ----------------
def _derive_policy(student_profile: str) -> Dict[str, Any]:
"""Infer strong/weak areas and target accuracies from a free-form profile."""
p = student_profile.lower()
strong_terms, weak_terms = set(), set()
# Heuristics from profile
if re.search(r"strong in (definitions?|theor(?:y|ies)|concepts?)", p):
strong_terms |= {"definition", "definitions", "theory", "theories", "concept", "concepts", "term", "terms"}
if re.search(r"weak(?:er)? in (definitions?|theor(?:y|ies)|concepts?)", p):
weak_terms |= {"definition", "definitions", "theory", "theories", "concept", "concepts", "term", "terms"}
if re.search(r"strong in (applications?|problem ?solving|calculations?)", p):
strong_terms |= {"application", "applications", "problem", "problems", "problem solving", "case", "cases", "calculation", "calculations", "practice"}
if re.search(r"weak(?:er)? in (applications?|problem ?solving|calculations?)", p):
weak_terms |= {"application", "applications", "problem", "problems", "problem solving", "case", "cases", "calculation", "calculations", "practice"}
# Generic defaults if not mentioned
if not strong_terms and "theor" in p:
strong_terms |= {"definition","concept","theory","term"}
if not weak_terms and "careless" in p:
weak_terms |= {"definition","term"} # careless β†’ slips on definitional precision
# Accuracy targets
overall = 0.65 # baseline realism
if "anxious" in p: overall -= 0.05
if "confident" in p: overall += 0.05
weak_acc = 0.45
strong_acc = 0.85
neutral_acc = overall
careless_rate = 0.15 if "careless" in p else 0.05
variance = 0.05 # small randomness
return {
"strong_terms": sorted(strong_terms),
"weak_terms": sorted(weak_terms),
"target_acc": {
"strong": strong_acc,
"weak": weak_acc,
"neutral": neutral_acc
},
"overall_target": overall,
"careless_rate": careless_rate,
"variance": variance
}
def _classify_subtopic(name: str, policy: Dict[str, Any]) -> str:
s = (name or "").lower()
strong_hits = any(t in s for t in policy["strong_terms"])
weak_hits = any(t in s for t in policy["weak_terms"])
if weak_hits and not strong_hits:
return "weak"
if strong_hits and not weak_hits:
return "strong"
return "neutral"
def _wrong_option_letter(correct_key: str) -> str:
pool = ["A","B","C","D"]
pool = [x for x in pool if x != (correct_key or "").upper()]
return random.choice(pool) if pool else "A"
def _enforce_profile_variation(
questions: List[Dict[str, Any]],
answers: List[Dict[str, Any]],
policy: Dict[str, Any]
) -> List[Dict[str, Any]]:
"""Post-process MCQ answers to meet target wrong-rate per category. Short answers untouched."""
# Indexing
q_by_id = {q["id"]: q for q in questions}
ans_by_id = {a["id"]: a["answer"] for a in answers}
# Collect MCQs per category
buckets = {"strong": [], "weak": [], "neutral": []}
for q in questions:
if q.get("question_type") != "MCQ":
continue
cat = _classify_subtopic(q.get("subtopic",""), policy)
buckets[cat].append(q["id"])
# For each category, compute current and target wrong counts
for cat, qids in buckets.items():
if not qids:
continue
target_acc = policy["target_acc"][cat]
# add small variance so runs don't look identical
target_acc += random.uniform(-policy["variance"], policy["variance"])
target_acc = max(0.2, min(0.95, target_acc))
total = len(qids)
desired_wrong = round(total * (1 - target_acc))
# Compute current wrongs
current_wrong = 0
correct_candidates = [] # qids currently correct β†’ can flip to wrong if needed
for qid in qids:
q = q_by_id[qid]
stu = (ans_by_id.get(qid) or "").strip().upper()
correct = (q.get("correct_key") or "").strip().upper()
if stu and correct and stu == correct:
correct_candidates.append(qid)
else:
current_wrong += 1
need_more_wrong = max(0, desired_wrong - current_wrong)
# Flip some correct ones to wrong
if need_more_wrong > 0 and correct_candidates:
random.shuffle(correct_candidates)
for qid in correct_candidates[:need_more_wrong]:
correct = (q_by_id[qid].get("correct_key") or "").strip().upper()
ans_by_id[qid] = _wrong_option_letter(correct)
# Optional: sprinkle a few careless slips across all categories
if random.random() < policy["careless_rate"]:
for qid in random.sample(qids, k=max(0, min(1, len(qids)))):
correct = (q_by_id[qid].get("correct_key") or "").strip().upper()
if ans_by_id.get(qid, "").upper() == correct:
ans_by_id[qid] = _wrong_option_letter(correct)
# Rebuild answers list
out = []
for a in answers:
qid = a["id"]
out.append({"id": qid, "answer": ans_by_id.get(qid, a["answer"])})
return out
def simulate_student_answers(
api_key: str,
model: str,
student_profile: str,
questions: List[Dict[str, Any]],
) -> List[Dict[str, Any]]:
# Pack questions with subtopics so the model can bias performance
qpack = [
{
"id": q["id"],
"subtopic": q["subtopic"],
"question_type": q["question_type"],
"question": q["question"],
"options": q["options"],
} for q in questions
]
# Derive an explicit policy from the free-text profile
policy = _derive_policy(student_profile)
prompt = SIMULATE_STUDENT_PROMPT.format(
student_profile=student_profile.strip(),
policy_json=json.dumps(policy, ensure_ascii=False, indent=2),
questions_json=json.dumps(qpack, ensure_ascii=False, indent=2),
)
msg = [
{"role": "system", "content": "Return strictly valid JSON and keep answers realistic given the policy."},
{"role": "user", "content": prompt},
]
raw = _call_openai_chat(api_key, model, msg, temperature=0.8, max_tokens=3000)
try:
data = json.loads(raw)
answers = data.get("answers", [])
except Exception:
raise gr.Error("Failed to parse student answers JSON.")
# Normalize
normalized = []
for a in answers:
qid = a.get("id")
ans = (a.get("answer") or "").strip()
if qid and ans:
normalized.append({"id": qid, "answer": ans})
# Keep only answers for our questions
q_ids = {q["id"] for q in questions}
filtered = [a for a in normalized if a["id"] in q_ids]
# Enforce target variation to visibly differentiate students (MCQ-safe)
filtered = _enforce_profile_variation(questions, filtered, policy)
return filtered
def grade_student(
api_key: str,
model: str,
questions: List[Dict[str, Any]],
student_answers: List[Dict[str, Any]],
) -> Tuple[List[Dict[str, Any]], List[Dict[str, Any]]]:
q_map = {q["id"]: q for q in questions}
bundle = []
for sa in student_answers:
qid = sa["id"]
if qid in q_map:
q = q_map[qid]
bundle.append({
"id": qid,
"subtopic": q["subtopic"],
"question_type": q["question_type"],
"question": q["question"],
"options": q["options"],
"correct_key": q.get("correct_key"),
"model_answer": q.get("model_answer"),
"student_answer": sa["answer"],
})
prompt = GRADING_PROMPT.format(
questions_and_keys_json=json.dumps(bundle, ensure_ascii=False, indent=2),
student_answers_json=json.dumps(student_answers, ensure_ascii=False, indent=2),
)
msg = [
{"role": "system", "content": "Return strictly valid JSON following the schema."},
{"role": "user", "content": prompt},
]
raw = _call_openai_chat(api_key, model, msg, temperature=0.0, max_tokens=3500)
try:
data = json.loads(raw)
results = data.get("results", [])
by_subtopic = data.get("by_subtopic", [])
for r in results:
r.setdefault("score", 1 if r.get("is_correct") else 0)
return results, by_subtopic
except Exception:
# Heuristic fallback (MCQ only)
results = []
tally = {}
for b in bundle:
is_correct = False
if b["question_type"] == "MCQ":
is_correct = (b["student_answer"].strip().upper() == (b.get("correct_key") or "").upper())
score = 1 if is_correct else 0
results.append({"id": b["id"], "subtopic": b["subtopic"], "is_correct": is_correct, "score": score, "rationale": "Heuristic fallback."})
t = tally.setdefault(b["subtopic"], {"subtopic": b["subtopic"], "total": 0, "correct": 0, "accuracy": 0.0})
t["total"] += 1
t["correct"] += score
for t in tally.values():
t["accuracy"] = round(t["correct"] / max(1, t["total"]), 3)
by_subtopic = list(tally.values())
return results, by_subtopic
def prescribe_homework(
api_key: str,
model: str,
perf1: List[Dict[str, Any]],
perf2: List[Dict[str, Any]],
) -> Dict[str, Any]:
prompt = PRESCRIPTION_PROMPT.format(
perf_1_json=json.dumps(perf1, ensure_ascii=False, indent=2),
perf_2_json=json.dumps(perf2, ensure_ascii=False, indent=2),
)
msg = [
{"role": "system", "content": "Return strictly valid JSON exactly as requested."},
{"role": "user", "content": prompt},
]
raw = _call_openai_chat(api_key, model, msg, temperature=0.4, max_tokens=2200)
try:
data = json.loads(raw)
return data
except Exception:
return {
"student_1": {"recap": "N/A", "weak_subtopics": [], "homework": []},
"student_2": {"recap": "N/A", "weak_subtopics": [], "homework": []},
}
# Personalized study summary helper
def summarize_student(
api_key: str,
model: str,
perf: List[Dict[str, Any]],
rx_student: Dict[str, Any],
) -> str:
prompt = STUDY_SUMMARY_PROMPT.format(
perf_json=json.dumps(perf, ensure_ascii=False, indent=2),
hw_json=json.dumps(rx_student or {}, ensure_ascii=False, indent=2),
)
msg = [
{"role": "system", "content": "Write concise Markdown only (no JSON, no preamble). Max ~180 words."},
{"role": "user", "content": prompt},
]
text = _call_openai_chat(api_key, model, msg, temperature=0.3, max_tokens=500)
return text.strip()
# --- Gradio UI ------------------------------------------------------------------
with gr.Blocks(css="footer {visibility: hidden}") as demo:
gr.Markdown("# πŸŽ“ Educational Tutor\nDesign subtopics β†’ generate questions β†’ simulate students β†’ analyze β†’ prescribe homework")
# App-wide state
st_api_key = gr.State("")
st_model = gr.State("gpt-4o-mini")
st_source_text = gr.State("")
st_subtopics = gr.State([]) # List[str]
st_selected_subtopics = gr.State([]) # List[str]
st_questions = gr.State([]) # List[dict]
st_student1_answers = gr.State([]) # List[dict]
st_student2_answers = gr.State([]) # List[dict]
st_grade1 = gr.State([]) # List[dict] results
st_grade2 = gr.State([])
st_perf1 = gr.State([]) # by_subtopic
st_perf2 = gr.State([])
st_rx = gr.State({}) # prescriptions
with gr.Tab("1) Setup"):
with gr.Row():
api_key_in = gr.Textbox(label="OpenAI API Key", type="password", placeholder="sk-...")
model_in = gr.Dropdown(
label="Model",
choices=[
"gpt-4o-mini",
"gpt-4o",
"o4-mini",
"gpt-4.1-mini",
"gpt-4.1",
"gpt-3.5-turbo",
"gpt-4-turbo",
],
value="gpt-4o-mini",
allow_custom_value=True,
)
save_btn = gr.Button("Save Settings", variant="primary")
status = gr.Markdown("")
def _save_settings(api_key, model):
if not api_key or not model:
raise gr.Error("Please provide API key and a model.")
return api_key, model, f"βœ… Settings saved: **{model}**"
save_btn.click(
_save_settings,
inputs=[api_key_in, model_in],
outputs=[st_api_key, st_model, status],
)
with gr.Tab("2) Subtopics"):
source_text = gr.Textbox(lines=12, label="Paste Educational Text", placeholder="Paste the text students will learn...")
min_sub = gr.Slider(2, 20, value=5, step=1, label="Minimum number of subtopics")
extract_btn = gr.Button("Extract Subtopics", variant="primary")
subs_out = gr.CheckboxGroup(label="Select subtopics to include", choices=[])
def _extract(api_key, model, text, min_n):
subs = extract_subtopics(api_key, model, text, int(min_n))
return (
text,
subs,
gr.update(choices=subs, value=subs)
)
extract_btn.click(
_extract,
inputs=[st_api_key, st_model, source_text, min_sub],
outputs=[st_source_text, st_subtopics, subs_out],
)
def _select_subs(selected, available):
if not available:
return []
safe = [s for s in (selected or []) if s in available]
return safe
subs_out.change(
_select_subs,
inputs=[subs_out, st_subtopics],
outputs=st_selected_subtopics
)
with gr.Tab("3) Generate Questions"):
with gr.Row():
qtype = gr.Radio(["Short Answer", "MCQ"], value="MCQ", label="Question Type")
n_per_sub = gr.Slider(1, 10, value=3, step=1, label="Questions per selected subtopic")
difficulty = gr.Dropdown(["easy", "medium", "hard"], value="medium", label="Difficulty")
gen_btn = gr.Button("Generate Questions", variant="primary")
q_table = gr.Dataframe(
headers=["id","subtopic","question_type","question","options","correct_key","model_answer"],
row_count=(1, "dynamic"),
type="array",
label="Generated Questions"
)
hint = gr.Markdown("You can edit cells. For MCQ 'options', keep valid JSON, e.g. {\"A\":\"...\",\"B\":\"...\",\"C\":\"...\",\"D\":\"...\"}")
def _gen_q(api_key, model, selected, qtype_value, n, diff):
qtype_norm = "MCQ" if qtype_value == "MCQ" else "Short Answer"
qs = generate_questions(api_key, model, selected or [], qtype_norm, int(n), diff)
rows = []
for q in qs:
rows.append([
q.get("id"),
q.get("subtopic"),
q.get("question_type"),
q.get("question"),
json.dumps(q.get("options"), ensure_ascii=False) if q.get("options") else None,
q.get("correct_key"),
q.get("model_answer"),
])
return qs, rows
gen_btn.click(
_gen_q,
inputs=[st_api_key, st_model, st_selected_subtopics, qtype, n_per_sub, difficulty],
outputs=[st_questions, q_table],
)
def _apply_edits(df):
qs = []
if not isinstance(df, list):
return qs
for row in df:
if not row:
continue
row = list(row) + [None] * (7 - len(row))
row = row[:7]
qid, subtopic, qtype_v, question, options_raw, correct_key, model_answer = row
if not (qid and question):
continue
options = None
if isinstance(options_raw, str) and options_raw.strip():
try:
parsed = json.loads(options_raw)
if isinstance(parsed, dict):
options = parsed
except Exception:
options = None
elif isinstance(options_raw, dict):
options = options_raw
qs.append({
"id": qid,
"subtopic": subtopic,
"question_type": qtype_v,
"question": question,
"options": options,
"correct_key": correct_key,
"model_answer": model_answer,
})
return qs
q_table.change(_apply_edits, inputs=q_table, outputs=st_questions)
with gr.Tab("4) Simulate Students"):
gr.Markdown("Provide brief profiles. The model will answer as each persona.")
s1 = gr.Textbox(label="Student 1 Profile", value="Diligent but anxious test-taker. Strong in theory, weaker in applications.")
s2 = gr.Textbox(label="Student 2 Profile", value="Confident and fast, sometimes careless. Strong in applications, weaker in definitions.")
sim_btn = gr.Button("Simulate Answers", variant="primary")
s1_table = gr.Dataframe(headers=["question_id","answer"], row_count=(1, "dynamic"), type="array", label="Student 1 Answers (editable)")
s2_table = gr.Dataframe(headers=["question_id","answer"], row_count=(1, "dynamic"), type="array", label="Student 2 Answers (editable)")
def _simulate(api_key, model, prof1, prof2, qs):
if not qs:
raise gr.Error("No questions generated yet.")
a1 = simulate_student_answers(api_key, model, prof1, qs)
a2 = simulate_student_answers(api_key, model, prof2, qs)
rows1 = [[x["id"], x["answer"]] for x in a1]
rows2 = [[x["id"], x["answer"]] for x in a2]
return a1, a2, rows1, rows2
sim_btn.click(
_simulate,
inputs=[st_api_key, st_model, s1, s2, st_questions],
outputs=[st_student1_answers, st_student2_answers, s1_table, s2_table],
)
def _apply_s_answers(df):
out = []
if not isinstance(df, list):
return out
for r in df:
if not r or len(r) < 2:
continue
qid = r[0]
ans = r[1]
if qid and ans is not None:
out.append({"id": qid, "answer": str(ans)})
return out
s1_table.change(_apply_s_answers, inputs=s1_table, outputs=st_student1_answers)
s2_table.change(_apply_s_answers, inputs=s2_table, outputs=st_student2_answers)
with gr.Tab("5) Analysis & Homework"):
grade_btn = gr.Button("Grade & Analyze", variant="primary")
with gr.Row():
perf1_tbl = gr.Dataframe(headers=["subtopic","total","correct","accuracy"], row_count=(1, "dynamic"), type="array", label="Student 1 – Per-Subtopic Performance")
perf2_tbl = gr.Dataframe(headers=["subtopic","total","correct","accuracy"], row_count=(1, "dynamic"), type="array", label="Student 2 – Per-Subtopic Performance")
report_md = gr.Markdown()
hw1 = gr.JSON(label="Student 1 – Homework Plan")
hw2 = gr.JSON(label="Student 2 – Homework Plan")
# Personalized study summaries
gr.Markdown("### Student 1 – Personalized Study Summary")
sum1_md = gr.Markdown()
gr.Markdown("### Student 2 – Personalized Study Summary")
sum2_md = gr.Markdown()
def _grade_and_analyze(api_key, model, qs, a1, a2):
if not qs or not a1 or not a2:
raise gr.Error("Need questions and both students' answers first.")
res1, by1 = grade_student(api_key, model, qs, a1)
res2, by2 = grade_student(api_key, model, qs, a2)
table1 = [[b["subtopic"], b["total"], b["correct"], b["accuracy"]] for b in by1]
table2 = [[b["subtopic"], b["total"], b["correct"], b["accuracy"]] for b in by2]
def _acc(by):
if not by: return 0.0
num = sum(b.get("correct", 0) for b in by)
den = sum(b.get("total", 0) for b in by)
return round(num / max(1, den), 3)
rx_json = prescribe_homework(api_key, model, by1, by2)
s = f"**Student 1 overall accuracy:** { _acc(by1) } \n**Student 2 overall accuracy:** { _acc(by2) } \n"
s += "\n**Notes:** Lower-accuracy subtopics indicate targets for remediation. See Homework and Personalized Summaries below."
s1_rx = rx_json.get("student_1", {})
s2_rx = rx_json.get("student_2", {})
# generate summaries using performance + homework
s1_sum = summarize_student(api_key, model, by1, s1_rx)
s2_sum = summarize_student(api_key, model, by2, s2_rx)
return (
res1, res2, by1, by2,
table1, table2,
s, s1_rx, s2_rx,
s1_sum, s2_sum
)
grade_btn.click(
_grade_and_analyze,
inputs=[st_api_key, st_model, st_questions, st_student1_answers, st_student2_answers],
outputs=[ # order must match return above
st_grade1, st_grade2, st_perf1, st_perf2,
perf1_tbl, perf2_tbl,
report_md, hw1, hw2,
sum1_md, sum2_md
],
)
gr.Markdown("β€” Built using Gradio + OpenAI β€”")
if __name__ == "__main__":
# Set share=True to get a public link
demo.launch(share=True)