Spaces:

DeepLearningAI
/

quiz-generator-v3

Sleeping

App Files Files Community

quiz-generator-v3 / ui /objective_handlers.py

ecuartasm

new lo

0b17ac5 about 1 month ago

raw

history blame contribute delete

26.9 kB

	import os
	import re
	import json
	import shutil
	from typing import List
	from models.learning_objectives import LearningObjective
	from .content_processor import ContentProcessor
	from quiz_generator import QuizGenerator
	from .state import get_processed_contents, set_processed_contents, set_learning_objectives
	from .run_manager import get_run_manager
	from .question_handlers import generate_questions

	def process_files(files, num_objectives, num_runs, model_name, incorrect_answer_model_name, temperature):
	"""Process uploaded files and generate learning objectives."""

	run_manager = get_run_manager()

	# Input validation
	if not files:
	return "Please upload at least one file.", None, None, None

	if not os.getenv("OPENAI_API_KEY"):
	return "OpenAI API key not found. Please set the OPENAI_API_KEY environment variable.", None, None, None

	# Extract file paths
	file_paths = _extract_file_paths(files)
	if not file_paths:
	return "No valid files found. Please upload valid .ipynb, .vtt, .srt, or .md files.", None, None, None

	# Start run and logging
	run_id = run_manager.start_objective_run(
	files=file_paths,
	num_objectives=num_objectives,
	num_runs=num_runs,
	model=model_name,
	incorrect_answer_model=incorrect_answer_model_name,
	temperature=temperature
	)

	run_manager.log(f"Processing {len(file_paths)} files: {[os.path.basename(f) for f in file_paths]}", level="DEBUG")

	# Process files
	processor = ContentProcessor()
	file_contents = processor.process_files(file_paths)

	if not file_contents:
	run_manager.log("No content extracted from the uploaded files", level="ERROR")
	return "No content extracted from the uploaded files.", None, None, None

	run_manager.log(f"Successfully extracted content from {len(file_contents)} files", level="INFO")

	# Store file contents for later use
	set_processed_contents(file_contents)

	# Generate learning objectives
	run_manager.log(f"Creating QuizGenerator with model={model_name}, temperature={temperature}", level="INFO")
	quiz_generator = QuizGenerator(
	api_key=os.getenv("OPENAI_API_KEY"),
	model=model_name,
	temperature=float(temperature)
	)

	all_learning_objectives = _generate_multiple_runs(
	quiz_generator, file_contents, num_objectives, num_runs, incorrect_answer_model_name, run_manager
	)

	# Group and rank objectives
	grouped_result = _group_base_objectives_add_incorrect_answers(
	quiz_generator, all_learning_objectives, file_contents, incorrect_answer_model_name, run_manager
	)

	# Format results for display
	formatted_results = _format_objective_results(grouped_result, all_learning_objectives, num_objectives, run_manager)

	# Store results
	set_learning_objectives(grouped_result["all_grouped"])

	# Save outputs to files
	params = {
	"files": [os.path.basename(f) for f in file_paths],
	"num_objectives": num_objectives,
	"num_runs": num_runs,
	"model": model_name,
	"incorrect_answer_model": incorrect_answer_model_name,
	"temperature": temperature
	}
	run_manager.save_objectives_outputs(
	best_in_group=formatted_results[1],
	all_grouped=formatted_results[2],
	raw_ungrouped=formatted_results[3],
	params=params
	)

	# End run
	run_manager.end_run(run_type="Learning Objectives")

	return formatted_results

	def regenerate_objectives(objectives_json, feedback, num_objectives, num_runs, model_name, temperature):
	"""Regenerate learning objectives based on feedback."""

	if not get_processed_contents():
	return "No processed content available. Please upload files first.", objectives_json, objectives_json

	if not os.getenv("OPENAI_API_KEY"):
	return "OpenAI API key not found.", objectives_json, objectives_json

	if not feedback:
	return "Please provide feedback to regenerate learning objectives.", objectives_json, objectives_json

	# Add feedback to file contents
	file_contents_with_feedback = get_processed_contents().copy()
	file_contents_with_feedback.append(f"FEEDBACK ON PREVIOUS OBJECTIVES: {feedback}")

	# Generate with feedback
	quiz_generator = QuizGenerator(
	api_key=os.getenv("OPENAI_API_KEY"),
	model=model_name,
	temperature=float(temperature)
	)

	try:
	# Generate multiple runs of learning objectives with feedback
	all_learning_objectives = _generate_multiple_runs(
	quiz_generator,
	file_contents_with_feedback,
	num_objectives,
	num_runs,
	model_name # Use the same model for incorrect answer suggestions
	)

	# Group and rank the objectives
	grouping_result = _group_base_objectives_add_incorrect_answers(quiz_generator, all_base_learning_objectives, file_contents_with_feedback, model_name)

	# Get the results
	grouped_objectives = grouping_result["all_grouped"]
	best_in_group_objectives = grouping_result["best_in_group"]

	# Convert to JSON
	grouped_objectives_json = json.dumps([obj.dict() for obj in grouped_objectives])
	best_in_group_json = json.dumps([obj.dict() for obj in best_in_group_objectives])

	return f"Generated {len(all_learning_objectives)} learning objectives, {len(best_in_group_objectives)} unique after grouping.", grouped_objectives_json, best_in_group_json

	except Exception as e:
	print(f"Error regenerating learning objectives: {e}")
	import traceback
	traceback.print_exc()
	return f"Error regenerating learning objectives: {str(e)}", objectives_json, objectives_json

	def _extract_file_paths(files):
	"""Extract file paths from different input formats."""
	file_paths = []

	if isinstance(files, list):
	for file in files:
	if file and os.path.exists(file):
	file_paths.append(file)
	elif isinstance(files, str) and os.path.exists(files):
	file_paths.append(files)
	elif hasattr(files, 'name') and os.path.exists(files.name):
	file_paths.append(files.name)

	return file_paths

	def _generate_multiple_runs(quiz_generator, file_contents, num_objectives, num_runs, incorrect_answer_model_name, run_manager):
	"""Generate learning objectives across multiple runs."""
	all_learning_objectives = []
	num_runs_int = int(num_runs)

	for run in range(num_runs_int):
	run_manager.log(f"Starting generation run {run+1}/{num_runs_int}", level="INFO")

	# Generate base learning objectives without grouping or incorrect answers
	learning_objectives = quiz_generator.generate_base_learning_objectives(
	file_contents, num_objectives, incorrect_answer_model_name
	)

	run_manager.log(f"Generated {len(learning_objectives)} learning objectives in run {run+1}", level="INFO")

	# Assign temporary IDs
	for i, obj in enumerate(learning_objectives):
	obj.id = 1000 * (run + 1) + (i + 1)

	all_learning_objectives.extend(learning_objectives)

	run_manager.log(f"Total learning objectives from all runs: {len(all_learning_objectives)}", level="INFO")
	return all_learning_objectives

	def _group_base_objectives_add_incorrect_answers(quiz_generator, all_base_learning_objectives, file_contents, incorrect_answer_model_name=None, run_manager=None):
	"""Group base learning objectives and add incorrect answers to best-in-group objectives."""
	run_manager.log("Grouping base learning objectives...", level="INFO")
	grouping_result = quiz_generator.group_base_learning_objectives(all_base_learning_objectives, file_contents)

	grouped_objectives = grouping_result["all_grouped"]
	best_in_group_objectives = grouping_result["best_in_group"]

	run_manager.log(f"Grouped into {len(best_in_group_objectives)} best-in-group objectives", level="INFO")

	# Find and reassign the best first objective to ID=1
	_reassign_objective_ids(grouped_objectives, run_manager)

	# Step 1: Generate incorrect answer suggestions only for best-in-group objectives
	run_manager.log("Generating incorrect answer options only for best-in-group objectives...", level="INFO")
	enhanced_best_in_group = quiz_generator.generate_lo_incorrect_answer_options(
	file_contents, best_in_group_objectives, incorrect_answer_model_name
	)

	run_manager.log("Generated incorrect answer options", level="INFO")

	# Clear debug directory for incorrect answer regeneration logs
	debug_dir = os.path.join("incorrect_suggestion_debug")
	if os.path.exists(debug_dir):
	shutil.rmtree(debug_dir)
	os.makedirs(debug_dir, exist_ok=True)

	# Step 2: Run the improvement workflow on the generated incorrect answers
	run_manager.log("Improving incorrect answer options for best-in-group objectives...", level="INFO")
	improved_best_in_group = quiz_generator.learning_objective_generator.regenerate_incorrect_answers(
	enhanced_best_in_group, file_contents
	)

	run_manager.log("Completed improvement of incorrect answer options", level="INFO")

	# Create a map of best-in-group objectives by ID for easy lookup
	best_in_group_map = {obj.id: obj for obj in improved_best_in_group}

	# Process all grouped objectives
	final_grouped_objectives = []

	for grouped_obj in grouped_objectives:
	if getattr(grouped_obj, "best_in_group", False):
	# For best-in-group objectives, use the enhanced version with incorrect answers
	if grouped_obj.id in best_in_group_map:
	final_grouped_objectives.append(best_in_group_map[grouped_obj.id])
	else:
	# This shouldn't happen, but just in case
	final_grouped_objectives.append(grouped_obj)
	else:
	# For non-best-in-group objectives, ensure they have empty incorrect answers
	final_grouped_objectives.append(LearningObjective(
	id=grouped_obj.id,
	learning_objective=grouped_obj.learning_objective,
	source_reference=grouped_obj.source_reference,
	correct_answer=grouped_obj.correct_answer,
	incorrect_answer_options=[], # Empty list for non-best-in-group
	in_group=getattr(grouped_obj, 'in_group', None),
	group_members=getattr(grouped_obj, 'group_members', None),
	best_in_group=getattr(grouped_obj, 'best_in_group', None)
	))

	return {
	"all_grouped": final_grouped_objectives,
	"best_in_group": improved_best_in_group
	}

	def _reassign_objective_ids(grouped_objectives, run_manager):
	"""Reassign IDs to ensure best first objective gets ID=1."""
	# Find best first objective
	best_first_objective = None

	# First identify all groups containing objectives with IDs ending in 001
	groups_with_001 = {}
	for obj in grouped_objectives:
	if obj.id % 1000 == 1: # ID ends in 001
	group_members = getattr(obj, "group_members", [obj.id])
	for member_id in group_members:
	if member_id not in groups_with_001:
	groups_with_001[member_id] = True

	# Now find the best_in_group objective from these groups
	for obj in grouped_objectives:
	obj_id = getattr(obj, "id", 0)
	group_members = getattr(obj, "group_members", [obj_id])

	# Check if this objective is in a group with 001 objectives
	is_in_001_group = any(member_id in groups_with_001 for member_id in group_members)

	if is_in_001_group and getattr(obj, "best_in_group", False):
	best_first_objective = obj
	run_manager.log(f"Found best_in_group objective in a 001 group with ID={obj.id}", level="DEBUG")
	break

	# If no best_in_group from 001 groups found, fall back to the first 001 objective
	if not best_first_objective:
	for obj in grouped_objectives:
	if obj.id % 1000 == 1: # First objective from a run
	best_first_objective = obj
	run_manager.log(f"No best_in_group from 001 groups found, using first 001 with ID={obj.id}", level="DEBUG")
	break
	# Reassign IDs
	id_counter = 2
	if best_first_objective:
	best_first_objective.id = 1
	run_manager.log(f"Reassigned primary objective to ID=1", level="INFO")

	for obj in grouped_objectives:
	if obj is best_first_objective:
	continue
	obj.id = id_counter
	id_counter += 1

	def _format_objective_results(grouped_result, all_learning_objectives, num_objectives, run_manager):
	"""Format objective results for display."""
	sorted_best_in_group = sorted(grouped_result["best_in_group"], key=lambda obj: obj.id)
	sorted_all_grouped = sorted(grouped_result["all_grouped"], key=lambda obj: obj.id)

	# Limit best-in-group to the requested number of objectives
	sorted_best_in_group = sorted_best_in_group[:num_objectives]

	run_manager.log("Formatting objective results for display", level="INFO")
	run_manager.log(f"Best-in-group objectives limited to top {len(sorted_best_in_group)} (requested: {num_objectives})", level="INFO")

	# Format best-in-group
	formatted_best_in_group = []
	for obj in sorted_best_in_group:
	formatted_best_in_group.append({
	"id": obj.id,
	"learning_objective": obj.learning_objective,
	"source_reference": obj.source_reference,
	"correct_answer": obj.correct_answer,
	"incorrect_answer_options": getattr(obj, 'incorrect_answer_options', None),
	"in_group": getattr(obj, 'in_group', None),
	"group_members": getattr(obj, 'group_members', None),
	"best_in_group": getattr(obj, 'best_in_group', None)
	})

	# Format grouped
	formatted_grouped = []
	for obj in sorted_all_grouped:
	formatted_grouped.append({
	"id": obj.id,
	"learning_objective": obj.learning_objective,
	"source_reference": obj.source_reference,
	"correct_answer": obj.correct_answer,
	"incorrect_answer_options": getattr(obj, 'incorrect_answer_options', None),
	"in_group": getattr(obj, 'in_group', None),
	"group_members": getattr(obj, 'group_members', None),
	"best_in_group": getattr(obj, 'best_in_group', None)
	})

	# Format unranked
	formatted_unranked = []
	for obj in all_learning_objectives:
	formatted_unranked.append({
	"id": obj.id,
	"learning_objective": obj.learning_objective,
	"source_reference": obj.source_reference,
	"correct_answer": obj.correct_answer
	})

	run_manager.log(f"Formatted {len(formatted_best_in_group)} best-in-group, {len(formatted_grouped)} grouped, {len(formatted_unranked)} raw objectives", level="INFO")

	return (
	f"Generated and grouped {len(formatted_best_in_group)} unique learning objectives successfully. Saved to run: {run_manager.get_current_run_id()}",
	json.dumps(formatted_best_in_group, indent=2),
	json.dumps(formatted_grouped, indent=2),
	json.dumps(formatted_unranked, indent=2)
	)

	def parse_user_learning_objectives(text: str) -> List[str]:
	"""
	Parse user-entered learning objectives text into a list of clean objective strings.

	Handles common label formats:
	- Numbered: "1. Objective" "2) Objective" "3: Objective"
	- Lettered: "a. Objective" "b) Objective" "c: Objective"
	- Plain: "Objective" (no label)

	Trailing punctuation is preserved as it may be part of the sentence.
	"""
	objectives = []
	for line in text.strip().split('\n'):
	line = line.strip()
	if not line:
	continue
	# Strip optional leading number/letter label followed by ., ), or :
	cleaned = re.sub(r'^(\d+\|[a-zA-Z])[\.\)\:]\s+', '', line)
	if cleaned:
	objectives.append(cleaned)
	return objectives


	def process_user_objectives(files, user_objectives_text, model_name, incorrect_answer_model_name, temperature):
	"""
	Process user-provided learning objectives using uploaded course materials.

	Pipeline:
	1. Parse objective texts from the user's input
	2. Find source references in course materials for each objective
	3. Generate a correct answer for each objective (same function as auto-generate flow)
	4. Generate incorrect answer options (all objectives are treated as best-in-group)
	5. Improve incorrect answer options iteratively
	6. Return output in the same format as the auto-generate flow
	"""
	run_manager = get_run_manager()

	# --- Input validation ---
	if not files:
	return "Please upload at least one file.", None, None, None

	if not user_objectives_text or not user_objectives_text.strip():
	return "Please enter at least one learning objective.", None, None, None

	if not os.getenv("OPENAI_API_KEY"):
	return "OpenAI API key not found. Please set the OPENAI_API_KEY environment variable.", None, None, None

	file_paths = _extract_file_paths(files)
	if not file_paths:
	return "No valid files found. Please upload valid .ipynb, .vtt, .srt, or .md files.", None, None, None

	objective_texts = parse_user_learning_objectives(user_objectives_text)
	if not objective_texts:
	return "No valid learning objectives found. Please enter at least one objective.", None, None, None

	# --- Start run ---
	run_manager.start_objective_run(
	files=file_paths,
	num_objectives=len(objective_texts),
	num_runs=1,
	model=model_name,
	incorrect_answer_model=incorrect_answer_model_name,
	temperature=temperature
	)

	run_manager.log(f"Processing {len(objective_texts)} user-provided learning objectives", level="INFO")

	# --- Process course material files ---
	processor = ContentProcessor()
	file_contents = processor.process_files(file_paths)

	if not file_contents:
	run_manager.log("No content extracted from the uploaded files", level="ERROR")
	return "No content extracted from the uploaded files.", None, None, None

	run_manager.log(f"Successfully extracted content from {len(file_contents)} files", level="INFO")
	set_processed_contents(file_contents)

	quiz_generator = QuizGenerator(
	api_key=os.getenv("OPENAI_API_KEY"),
	model=model_name,
	temperature=float(temperature)
	)

	# --- Step 1: Find source references in course materials ---
	run_manager.log("Finding source references for user-provided objectives...", level="INFO")
	from learning_objective_generator.base_generation import (
	find_sources_for_user_objectives,
	generate_correct_answers_for_objectives
	)
	objectives_without_answers = find_sources_for_user_objectives(
	quiz_generator.client, model_name, float(temperature), file_contents, objective_texts
	)
	run_manager.log(f"Found sources for {len(objectives_without_answers)} objectives", level="INFO")

	# --- Step 2: Generate correct answers ---
	run_manager.log("Generating correct answers for user-provided objectives...", level="INFO")
	base_objectives = generate_correct_answers_for_objectives(
	quiz_generator.client, model_name, float(temperature), file_contents, objectives_without_answers
	)
	run_manager.log(f"Generated correct answers for {len(base_objectives)} objectives", level="INFO")

	# --- Step 3: Generate incorrect answer options ---
	run_manager.log("Generating incorrect answer options...", level="INFO")
	debug_dir = os.path.join("incorrect_suggestion_debug")
	if os.path.exists(debug_dir):
	shutil.rmtree(debug_dir)
	os.makedirs(debug_dir, exist_ok=True)

	enhanced_objectives = quiz_generator.generate_lo_incorrect_answer_options(
	file_contents, base_objectives, incorrect_answer_model_name
	)
	run_manager.log("Generated incorrect answer options", level="INFO")

	# --- Step 4: Improve incorrect answers iteratively ---
	run_manager.log("Improving incorrect answer options...", level="INFO")
	improved_objectives = quiz_generator.learning_objective_generator.regenerate_incorrect_answers(
	enhanced_objectives, file_contents
	)
	run_manager.log("Completed improvement of incorrect answer options", level="INFO")

	# All user-provided objectives are their own group and all are best-in-group
	for obj in improved_objectives:
	obj.in_group = False
	obj.group_members = [obj.id]
	obj.best_in_group = True

	set_learning_objectives(improved_objectives)

	# --- Format and return results ---
	formatted_results = _format_user_objective_results(improved_objectives, run_manager)

	params = {
	"files": [os.path.basename(f) for f in file_paths],
	"num_objectives": len(objective_texts),
	"num_runs": 1,
	"model": model_name,
	"incorrect_answer_model": incorrect_answer_model_name,
	"temperature": temperature,
	"source": "user-provided"
	}
	run_manager.save_objectives_outputs(
	best_in_group=formatted_results[1],
	all_grouped=formatted_results[2],
	raw_ungrouped=formatted_results[3],
	params=params
	)

	run_manager.end_run(run_type="Learning Objectives (User-provided)")

	return formatted_results


	def _format_user_objective_results(objectives, run_manager):
	"""Format user-provided objective results for display (same structure as auto-generated)."""
	sorted_objectives = sorted(objectives, key=lambda obj: obj.id)

	run_manager.log(f"Formatting {len(sorted_objectives)} user-provided objectives for display", level="INFO")

	formatted_best_in_group = []
	for obj in sorted_objectives:
	formatted_best_in_group.append({
	"id": obj.id,
	"learning_objective": obj.learning_objective,
	"source_reference": obj.source_reference,
	"correct_answer": obj.correct_answer,
	"incorrect_answer_options": getattr(obj, 'incorrect_answer_options', None),
	"in_group": getattr(obj, 'in_group', None),
	"group_members": getattr(obj, 'group_members', None),
	"best_in_group": getattr(obj, 'best_in_group', None)
	})

	# Grouped view is identical to best-in-group (no grouping was performed)
	formatted_grouped = formatted_best_in_group

	# Raw view: base fields only (no incorrect answers), for the debug panel
	formatted_unranked = [
	{
	"id": obj.id,
	"learning_objective": obj.learning_objective,
	"source_reference": obj.source_reference,
	"correct_answer": obj.correct_answer
	}
	for obj in sorted_objectives
	]

	return (
	f"Processed {len(formatted_best_in_group)} user-provided learning objectives successfully. Saved to run: {run_manager.get_current_run_id()}",
	json.dumps(formatted_best_in_group, indent=2),
	json.dumps(formatted_grouped, indent=2),
	json.dumps(formatted_unranked, indent=2)
	)


	def process_user_objectives_and_generate_questions(files, user_objectives_text, model_name, incorrect_answer_model_name,
	temperature, model_name_q, temperature_q, num_questions, num_runs_q):
	"""Process user-provided objectives and then generate questions in one flow."""
	obj_results = process_user_objectives(files, user_objectives_text, model_name, incorrect_answer_model_name, temperature)

	status_obj, objectives_output, grouped_output, raw_ungrouped_output = obj_results

	if not objectives_output or objectives_output is None:
	return (
	status_obj, objectives_output, grouped_output, raw_ungrouped_output,
	"Learning objectives processing failed. Cannot proceed with questions.",
	None, None, None
	)

	question_results = generate_questions(objectives_output, model_name_q, temperature_q, num_questions, num_runs_q)
	status_q, best_questions_output, all_questions_output, formatted_quiz_output = question_results

	return (
	f"{status_obj}\n\nThen:\n{status_q}",
	objectives_output, grouped_output, raw_ungrouped_output,
	status_q, best_questions_output, all_questions_output, formatted_quiz_output
	)


	def process_files_and_generate_questions(files, num_objectives, num_runs, model_name, incorrect_answer_model_name,
	temperature, model_name_q, temperature_q, num_questions, num_runs_q):
	"""Process files, generate learning objectives, and then generate questions in one flow."""

	# First, generate learning objectives
	obj_results = process_files(files, num_objectives, num_runs, model_name, incorrect_answer_model_name, temperature)

	# obj_results contains: (status, objectives_output, grouped_output, raw_ungrouped_output)
	status_obj, objectives_output, grouped_output, raw_ungrouped_output = obj_results

	# Check if objectives generation failed
	if not objectives_output or objectives_output is None:
	# Return error status for objectives and empty values for questions
	return (
	status_obj, # status_output
	objectives_output, # objectives_output
	grouped_output, # grouped_output
	raw_ungrouped_output, # raw_ungrouped_output
	"Learning objectives generation failed. Cannot proceed with questions.", # status_q_output
	None, # best_questions_output
	None, # all_questions_output
	None # formatted_quiz_output
	)

	# Now generate questions using the objectives
	question_results = generate_questions(objectives_output, model_name_q, temperature_q, num_questions, num_runs_q)

	# question_results contains: (status_q, best_questions_output, all_questions_output, formatted_quiz_output)
	status_q, best_questions_output, all_questions_output, formatted_quiz_output = question_results

	# Combine the status messages
	combined_status = f"{status_obj}\n\nThen:\n{status_q}"

	# Return all 8 outputs
	return (
	combined_status, # status_output
	objectives_output, # objectives_output
	grouped_output, # grouped_output
	raw_ungrouped_output, # raw_ungrouped_output
	status_q, # status_q_output
	best_questions_output, # best_questions_output
	all_questions_output, # all_questions_output
	formatted_quiz_output # formatted_quiz_output
	)