Spaces:

DeepLearningAI
/

quiz-generator-v3

Sleeping

App Files Files Community

quiz-generator-v3 / ui /edit_handlers.py

ecuartasm

check yml

2ac15a9 about 1 month ago

raw

history blame contribute delete

18.9 kB

	import re
	import json
	import tempfile
	import yaml
	import gradio as gr
	from openai import OpenAI
	from pydantic import BaseModel
	from typing import List
	from .run_manager import get_run_manager


	# ---------------------------------------------------------------------------
	# Pydantic model for LLM validation response
	# ---------------------------------------------------------------------------

	class _QuestionValidation(BaseModel):
	is_valid: bool
	issues: List[str]


	# ---------------------------------------------------------------------------
	# Helpers
	# ---------------------------------------------------------------------------

	def _next_button_label(index, total):
	"""Return 'Accept & Finish' for the last question, 'Accept & Next' otherwise."""
	if total > 0 and index >= total - 1:
	return gr.update(value="Accept & Finish")
	return gr.update(value="Accept & Next")


	def _sanitize_text(text, keep_bullets: bool = False) -> str:
	"""Normalize Unicode typography then strip any remaining non-ASCII characters.

	Only standard printable ASCII (32-126), newlines, and tabs are kept.
	Set keep_bullets=True to additionally preserve the bullet chars (• ◦) used
	as structural markers in the .md editing format.
	"""
	if not text:
	return text

	# Normalize common Unicode typography to ASCII equivalents
	_REPLACEMENTS = {
	'\u2018': "'", '\u2019': "'", # ' ' (smart single quotes)
	'\u201c': '"', '\u201d': '"', # " " (smart double quotes)
	'\u2013': '-', '\u2014': '-', # – — (en/em dashes)
	'\u2026': '...', # … (ellipsis)
	'\u00a0': ' ', # non-breaking space
	'\u00b2': '2', '\u00b3': '3', # superscript digits
	}
	for uc, rep in _REPLACEMENTS.items():
	text = text.replace(uc, rep)

	# Chars always allowed: printable ASCII + newline + tab
	def _allowed(c: str) -> bool:
	if 32 <= ord(c) <= 126 or c in '\n\t':
	return True
	if keep_bullets and c in '\u2022\u25e6': # • ◦
	return True
	return False

	return ''.join(c for c in text if _allowed(c))


	# ---------------------------------------------------------------------------
	# Markdown parsing
	# ---------------------------------------------------------------------------

	def _parse_questions(md_content: str) -> List[str]:
	"""Split formatted_quiz.md content into individual question blocks."""
	parts = re.split(r'(?=\\Question \d+)', md_content.strip())
	return [p.strip() for p in parts if p.strip()]


	def _parse_question_block(block_text: str) -> dict:
	"""Parse a single markdown question block into structured data.

	Supports multi-line prompts: non-empty lines between the question header
	and the first option are accumulated as additional prompt text.
	"""
	prompt_lines: List[str] = []
	options: List[dict] = []
	current_option = None
	in_prompt = False

	for line in block_text.split('\n'):
	stripped = line.strip()

	# Question header (colon may be inside or outside bold markers)
	q_match = re.match(r'\\Question \d+.?\\:?\s(.*)', stripped)
	if q_match:
	first_line = q_match.group(1).strip()
	if first_line:
	prompt_lines.append(first_line)
	in_prompt = True
	continue

	# Skip ranking reasoning line and stop prompt accumulation
	if stripped.startswith('Ranking Reasoning:'):
	in_prompt = False
	continue

	# Option line: • A [Correct]: text or • A: text
	opt_match = re.match(r'•\s([A-D])\s(\[Correct\])?\s:\s(.+)', stripped)
	if opt_match:
	in_prompt = False
	if current_option:
	options.append(current_option)
	current_option = {
	'answer': opt_match.group(3).strip(),
	'isCorrect': opt_match.group(2) is not None,
	'feedback': ''
	}
	continue

	# Feedback line
	fb_match = re.match(r'◦\sFeedback:\s(.+)', stripped)
	if fb_match and current_option:
	current_option['feedback'] = fb_match.group(1).strip()
	continue

	# Accumulate additional prompt lines
	if in_prompt and stripped:
	prompt_lines.append(stripped)

	if current_option:
	options.append(current_option)

	return {'prompt': '\n'.join(prompt_lines), 'options': options}


	# ---------------------------------------------------------------------------
	# YAML generation
	# ---------------------------------------------------------------------------

	def _generate_yml(questions_data: List[dict]) -> str:
	"""Generate YAML quiz from parsed question data using the standard format.

	All text fields (prompt, answer, feedback) use the '\|-' block scalar
	and are sanitized to contain only standard printable ASCII characters.
	"""
	lines = [
	"name: Quiz 1",
	"passingThreshold: 5",
	"estimatedTimeSec: 600",
	"maxTrialsPer24Hrs: 3",
	"courseSlug: course_Slug",
	"insertAfterConclusion: true",
	"RandomQuestionPosition: true",
	"questions:",
	]

	for q in questions_data:
	lines.append(" - typeName: multipleChoice")
	lines.append(" points: 1")
	lines.append(" shuffle: true")
	lines.append(" prompt: \|-")
	for prompt_line in _sanitize_text(q['prompt']).split('\n'):
	lines.append(f" {prompt_line}")
	lines.append(" options:")
	for opt in q['options']:
	answer_clean = _sanitize_text(opt['answer'])
	feedback_clean = _sanitize_text(opt['feedback'])
	is_correct = 'true' if opt['isCorrect'] else 'false'
	lines.append(" - answer: \|-")
	for answer_line in answer_clean.split('\n'):
	lines.append(f" {answer_line}")
	lines.append(f" isCorrect: {is_correct}")
	lines.append(" feedback: \|-")
	for fb_line in feedback_clean.split('\n'):
	lines.append(f" {fb_line}")

	return '\n'.join(lines) + '\n'


	# ---------------------------------------------------------------------------
	# YAML loading (converts any valid YAML quiz to md blocks)
	# ---------------------------------------------------------------------------

	def _parse_yml_to_md_blocks(yml_content: str):
	"""Parse a YAML quiz file into Markdown question blocks.

	Handles both '\|-' block scalars and quoted-string answer formats since
	PyYAML normalizes both to plain Python strings.

	Returns (blocks, error_message). On success error_message is None.
	"""
	try:
	data = yaml.safe_load(yml_content)
	except yaml.YAMLError as e:
	return None, f"Failed to parse YAML: {e}"

	if not isinstance(data, dict):
	return None, "Invalid YAML structure: expected a mapping at the top level."

	questions = data.get('questions', [])
	if not questions:
	return None, "No questions found in the YAML file."

	option_letters = ['A', 'B', 'C', 'D']
	blocks = []

	for i, q in enumerate(questions, start=1):
	prompt = str(q.get('prompt', '')).strip()
	options = q.get('options', [])

	prompt_lines = prompt.split('\n')
	first_line = prompt_lines[0] if prompt_lines else ''
	extra_lines = [l.strip() for l in prompt_lines[1:] if l.strip()]

	block_lines = [f"Question {i}: {first_line}"]
	for extra in extra_lines:
	block_lines.append(extra)
	block_lines.append("")

	for j, opt in enumerate(options):
	if j >= len(option_letters):
	break
	letter = option_letters[j]
	answer = str(opt.get('answer', '')).strip()
	is_correct = opt.get('isCorrect', False)
	feedback = str(opt.get('feedback', '')).strip()

	correct_marker = " [Correct]" if is_correct else ""
	block_lines.append(f"\t• {letter}{correct_marker}: {answer}")
	if feedback:
	block_lines.append(f"\t ◦ Feedback: {feedback}")
	block_lines.append("")

	blocks.append('\n'.join(block_lines).strip())

	return blocks, None


	# ---------------------------------------------------------------------------
	# LLM validation
	# ---------------------------------------------------------------------------

	def _validate_question_block(block_text: str) -> List[str]:
	"""Validate a question block structurally, then with LLM semantic check.

	Returns a list of issue strings. An empty list means the question is valid.
	Structural issues block advancement; LLM issues produce warnings but still
	surface as returned issues so the caller can decide how to handle them.
	"""
	parsed = _parse_question_block(block_text)
	issues: List[str] = []

	# --- Structural validation (fast, no API call) ---
	if not parsed['prompt'].strip():
	issues.append("Missing question prompt.")

	n_opts = len(parsed['options'])
	if n_opts != 4:
	issues.append(f"Expected 4 answer options, found {n_opts}.")
	else:
	correct_count = sum(1 for o in parsed['options'] if o['isCorrect'])
	if correct_count == 0:
	issues.append("No option is marked as correct. Add [Correct] to one option.")
	elif correct_count > 1:
	issues.append(f"{correct_count} options are marked correct; exactly 1 is required.")
	for i, opt in enumerate(parsed['options']):
	letter = chr(65 + i)
	if not opt['answer'].strip():
	issues.append(f"Option {letter} has no answer text.")
	if not opt['feedback'].strip():
	issues.append(f"Option {letter} is missing feedback.")

	# Don't call the LLM if the question is structurally broken
	if issues:
	return issues

	# --- LLM semantic validation ---
	try:
	client = OpenAI()
	options_text = "\n".join(
	f"{'[CORRECT] ' if o['isCorrect'] else ''}Answer: {o['answer']}\n"
	f"Feedback: {o['feedback']}"
	for o in parsed['options']
	)
	prompt = (
	"You are an educational quality reviewer. Evaluate this multiple-choice question.\n\n"
	f"Question: {parsed['prompt']}\n\n"
	f"{options_text}\n\n"
	"Check for: (1) clarity and unambiguity of the question, "
	"(2) factual correctness of the marked answer, "
	"(3) plausibility but clear incorrectness of the distractors, "
	"(4) accuracy and helpfulness of the feedback for each option.\n"
	'Return JSON with schema: {"is_valid": bool, "issues": ["issue1", ...]}'
	)
	result = client.beta.chat.completions.parse(
	model="gpt-4o-mini",
	messages=[{"role": "user", "content": prompt}],
	response_format=_QuestionValidation,
	)
	validation = result.choices[0].message.parsed
	if not validation.is_valid and validation.issues:
	issues.extend(validation.issues)
	except Exception:
	# Never block saving if the LLM is unavailable
	pass

	return issues


	# ---------------------------------------------------------------------------
	# Public handlers (called by ui/app.py)
	# ---------------------------------------------------------------------------

	def load_quiz_for_editing(formatted_quiz_text: str = ""):
	"""Load the generated quiz for editing. Tries disk first, falls back to UI text."""
	run_manager = get_run_manager()
	content = None

	quiz_path = run_manager.get_latest_formatted_quiz_path()
	if quiz_path is not None:
	with open(quiz_path, "r", encoding="utf-8") as f:
	content = f.read()

	if not content and formatted_quiz_text:
	content = formatted_quiz_text

	if not content:
	return (
	"No formatted quiz found. Generate questions in the 'Generate Questions' tab first.",
	"", [], 0, [], gr.update(),
	)

	questions = _parse_questions(content)
	if not questions:
	return "The quiz file is empty.", "", [], 0, [], gr.update()

	edited = list(questions)
	return (
	f"Question 1 of {len(questions)}",
	questions[0], questions, 0, edited,
	_next_button_label(0, len(questions)),
	)


	def load_file_for_editing(file_path):
	"""Load a user-uploaded .md or .yml quiz file and initialise the editing flow."""
	if file_path is None:
	return "No file uploaded.", "", [], 0, [], gr.update()

	try:
	with open(file_path, 'r', encoding='utf-8') as f:
	content = f.read()
	except Exception as e:
	return f"Error reading file: {e}", "", [], 0, [], gr.update()

	file_lower = str(file_path).lower()

	if file_lower.endswith('.yml') or file_lower.endswith('.yaml'):
	questions, error = _parse_yml_to_md_blocks(content)
	if error:
	return error, "", [], 0, [], gr.update()
	elif file_lower.endswith('.md'):
	questions = _parse_questions(content)
	if not questions:
	return "No questions found in the Markdown file.", "", [], 0, [], gr.update()
	else:
	return "Unsupported file format. Please upload a .md or .yml file.", "", [], 0, [], gr.update()

	if not questions:
	return "No questions found in the file.", "", [], 0, [], gr.update()

	n = len(questions)
	edited = list(questions)
	return (
	f"Loaded {n} question(s) from file. Showing Question 1 of {n}.",
	questions[0], questions, 0, edited,
	_next_button_label(0, n),
	)


	def accept_and_next(current_text: str, questions: list, index: int, edited: list):
	"""Validate current question, then save and advance to the next one.

	Structural errors block advancement. LLM semantic issues are surfaced as
	warnings but still allow the user to proceed.
	"""
	if not questions:
	return "No quiz loaded.", "", questions, index, edited, gr.update()

	# --- Validate before saving ---
	issues = _validate_question_block(current_text)

	# Separate structural issues (must be fixed) from LLM warnings
	structural_keywords = [
	"Missing question", "Expected 4", "No option is marked",
	"options are marked correct", "has no answer text", "is missing feedback"
	]
	structural_issues = [i for i in issues if any(k in i for k in structural_keywords)]
	llm_warnings = [i for i in issues if i not in structural_issues]

	if structural_issues:
	error_msg = "Cannot advance — please fix: " + "; ".join(structural_issues)
	return (
	error_msg, current_text, questions, index, edited,
	_next_button_label(index, len(questions)),
	)

	# Save the (valid) edit
	edited[index] = current_text

	if index + 1 < len(questions):
	new_index = index + 1
	base_status = f"Question {new_index + 1} of {len(questions)}"
	if llm_warnings:
	base_status += f" \| WARNING (previous Q): {'; '.join(llm_warnings)}"
	return (
	base_status, edited[new_index], questions, new_index, edited,
	_next_button_label(new_index, len(questions)),
	)
	else:
	base_status = f"All {len(questions)} questions reviewed. Click 'Download edited quiz' to save."
	if llm_warnings:
	base_status += f" \| WARNING: {'; '.join(llm_warnings)}"
	return (
	base_status, current_text, questions, index, edited,
	gr.update(value="Accept & Finish"),
	)


	def go_previous(current_text: str, questions: list, index: int, edited: list):
	"""Save current edit and go back to the previous question."""
	if not questions:
	return "No quiz loaded.", "", questions, index, edited, gr.update()

	edited[index] = current_text

	if index > 0:
	new_index = index - 1
	return (
	f"Question {new_index + 1} of {len(questions)}",
	edited[new_index], questions, new_index, edited,
	_next_button_label(new_index, len(questions)),
	)
	return (
	f"Question 1 of {len(questions)} (already at first question)",
	current_text, questions, index, edited,
	_next_button_label(index, len(questions)),
	)


	def save_and_download(current_text: str, questions: list, index: int, edited: list):
	"""Validate all questions structurally, then join, sanitize, and export."""
	if not edited:
	return "No edited questions to save.", None

	# Save the current edit in case user did not click Accept
	edited[index] = current_text

	# --- Structural validation of every question before export ---
	all_errors: List[str] = []
	for i, block in enumerate(edited, start=1):
	parsed = _parse_question_block(block)
	q_errors: List[str] = []
	if not parsed['prompt'].strip():
	q_errors.append("missing prompt")
	if len(parsed['options']) != 4:
	q_errors.append(f"expected 4 options, found {len(parsed['options'])}")
	else:
	correct_count = sum(1 for o in parsed['options'] if o['isCorrect'])
	if correct_count != 1:
	q_errors.append(f"expected 1 correct option, found {correct_count}")
	for j, opt in enumerate(parsed['options']):
	if not opt['feedback'].strip():
	q_errors.append(f"option {chr(65+j)} missing feedback")
	if q_errors:
	all_errors.append(f"Question {i}: {'; '.join(q_errors)}")

	if all_errors:
	return "Export blocked — fix these issues first:\n" + "\n".join(all_errors), None

	# --- Build outputs ---
	# .md: sanitize text content but keep bullet markers (• ◦) for readability
	combined_md = _sanitize_text("\n\n".join(edited) + "\n", keep_bullets=True)

	# .yml: fully sanitized via _generate_yml
	questions_data = [_parse_question_block(q) for q in edited]
	yml_content = _generate_yml(questions_data)

	# Save to output folder
	run_manager = get_run_manager()
	saved_path = run_manager.save_edited_quiz(combined_md, "formatted_quiz_edited.md")
	run_manager.save_edited_quiz(yml_content, "formatted_quiz_edited.yml")

	# Temp files for Gradio download
	tmp_md = tempfile.NamedTemporaryFile(delete=False, suffix=".md", mode="w", encoding="utf-8")
	tmp_md.write(combined_md)
	tmp_md.close()

	tmp_yml = tempfile.NamedTemporaryFile(delete=False, suffix=".yml", mode="w", encoding="utf-8")
	tmp_yml.write(yml_content)
	tmp_yml.close()

	status = f"Saved to {saved_path}" if saved_path else "Download ready."
	return status, [tmp_md.name, tmp_yml.name]