Spaces:

DeepLearningAI
/

quiz-generator-v3

Sleeping

App Files Files Community

quiz-generator-v3 / learning_objective_generator /grouping_and_ranking.py

ecuartasm

Initial commit: AI Course Assessment Generator

217abc3 about 2 months ago

raw

history blame contribute delete

17.4 kB

	from typing import List, Dict, Any
	from openai import OpenAI
	import json
	from models import LearningObjective, BaseLearningObjective, GroupedLearningObjectivesResponse, GroupedBaseLearningObjectivesResponse
	from prompts.learning_objectives import BASE_LEARNING_OBJECTIVES_PROMPT, BLOOMS_TAXONOMY_LEVELS, LEARNING_OBJECTIVE_EXAMPLES


	def group_learning_objectives(client: OpenAI, model: str, temperature: float, learning_objectives: List[LearningObjective], file_contents: List[str]) -> dict:
	"""Group learning objectives and return both the full ranked list and the best-in-group list as Python objects."""
	try:
	print(f"Grouping {len(learning_objectives)} learning objectives")

	objectives_to_rank = learning_objectives

	if not objectives_to_rank:
	return learning_objectives # Nothing to rank

	# Create combined content for context
	combined_content = "\n\n".join(file_contents)

	# Format the objectives for display in the prompt
	objectives_display = "\n".join([f"ID: {obj.id}\nLearning Objective: {obj.learning_objective}\nSource: {obj.source_reference}\nCorrect Answer: {getattr(obj, 'correct_answer', '')}\nIncorrect Answer Options: {json.dumps(getattr(obj, 'incorrect_answer_options', []))}\n" for obj in objectives_to_rank])

	# Create prompt for ranking using the same context as generation but without duplicating content
	ranking_prompt = f"""
	The generation prompt below was used to generate the learning objectives and now your job is to group and determine the best in the group. Group according
	to topic overlap, and select the best in the group according to the criteria in the generation prompt.


	Here's the generation prompt:

	<generation prompt>

	You are an expert educational content creator specializing in creating precise, relevant learning objectives from course materials.

	{BASE_LEARNING_OBJECTIVES_PROMPT}

	<BloomsTaxonomyLevels>
	{BLOOMS_TAXONOMY_LEVELS}
	</BloomsTaxonomyLevels>

	Here is an example of high quality learning objectives:
	<learning objectives>
	{LEARNING_OBJECTIVE_EXAMPLES}
	</learning objectives>

	Use the below course content to assess topic overlap. The source references are embedded in xml tags within the context.
	<course content>
	{combined_content}
	</course content>

	</generation prompt>

	The learning objectives below were generated based on the content and criteria in the generation prompt above. Now your task is to group these learning objectives
	based on how well they meet the criteria described in the generation prompt above.

	IMPORTANT GROUPING INSTRUCTIONS:
	1. Group learning objectives by similarity, including those that cover the same foundational concept.
	2. Return a JSON array with each objective's original ID and its group information ("in_group": bool, "group_members": list[int], "best_in_group": bool). See example below.
	3. Consider clarity, specificity, alignment with the course content, and how well each objective follows the criteria in the generation prompt.
	4. Identify groups of similar learning objectives that cover essentially the same concept or knowledge area.
	5. For each objective, indicate whether it belongs to a group of similar objectives by setting "in_group" to true or false.
	6. For objectives that are part of a group, include a "group_members" field with a list of all IDs in that group (including the objective itself). If an objective is not part of a group, set "group_members" to a list containing only the objective's ID.
	7. For each objective, add a boolean field "best_in_group": set this to true for the highest-quality objective in each group, and false for all others in the group. For objectives not in a group, set "best_in_group" to true by default.
	8. SPECIAL INSTRUCTION: All objectives with IDs ending in 1 (like 1001, 2001, etc.) are the first objectives from different generation runs. Group ALL of these together and mark the best one as "best_in_group": true. This is critical for ensuring one of these objectives is selected as the primary objective:
	a. Group ALL objectives with IDs ending in 1 together in the SAME group.
	b. Evaluate these objectives carefully and select the SINGLE best one based on clarity, specificity, and alignment with course content.
	c. Mark ONLY the best one with "best_in_group": true and all others with "best_in_group": false.
	d. This objective will later be assigned ID=1 and will serve as the primary objective, so choose the highest quality one.
	e. If you find other objectives that cover the same concept but don't have IDs ending in 1, include them in this group but do NOT mark them as best_in_group.
	Here are the learning objectives to group:

	<learning objectives>
	{objectives_display}
	</learning objectives>

	Return your grouped learning objectives as a JSON array in this format. Each objective must include ALL of the following fields:
	[
	{{
	"id": int,
	"learning_objective": str,
	"source_reference": list[str] or str,
	"correct_answer": str,
	"incorrect_answer_suggestions": list[str],
	"in_group": bool,
	"group_members": list[int],
	"best_in_group": bool
	}},
	...
	]
	Example:
	[
	{{
	"id": 3,
	"learning_objective": "Describe the main applications of AI agents.",
	"source_reference": ["sc-Arize-C1-L3-eng.vtt"],
	"correct_answer": "AI agents are used for automation, decision-making, and information retrieval.",
	"incorrect_answer_suggestions": [
	"AI agents are used for automation and data analysis",
	"AI agents are designed for information retrieval and prediction",
	"AI agents are specialized for either automation or decision-making",
	],
	"in_group": true,
	"group_members": [3, 5, 7],
	"best_in_group": true
	}}
	]
	"""

	# Use OpenAI beta API for structured output
	try:

	params = {
	"model": "gpt-5-mini",
	"messages": [
	{"role": "system", "content": "You are an expert educational content evaluator."},
	{"role": "user", "content": ranking_prompt}
	],
	"response_format": GroupedLearningObjectivesResponse
	}

	completion = client.beta.chat.completions.parse(**params)
	grouped_results = completion.choices[0].message.parsed.grouped_objectives
	print(f"Received {len(grouped_results)} grouped results")


	# Normalize best_in_group to Python bool
	for obj in grouped_results:
	val = getattr(obj, "best_in_group", False)
	if isinstance(val, str):
	obj.best_in_group = val.lower() == "true"
	elif isinstance(val, (bool, int)):
	obj.best_in_group = bool(val)
	else:
	obj.best_in_group = False
	# if id_one_objective:
	# final_objectives[0].best_in_group = True
	# Initialize final_objectives with the grouped results
	final_objectives = []
	for obj in grouped_results:
	final_objectives.append(obj)

	# Filter for best-in-group objectives (including id==1 always)
	best_in_group_objectives = [obj for obj in final_objectives if getattr(obj, "best_in_group", False) is True]

	return {
	"all_grouped": final_objectives,
	"best_in_group": best_in_group_objectives
	}

	except Exception as e:
	print(f"Error ranking learning objectives: {e}")
	return {"all_grouped": learning_objectives, "best_in_group": get_best_in_group_objectives(learning_objectives)}



	except Exception as e:
	print(f"Error ranking learning objectives: {e}")
	return {"all_grouped": learning_objectives, "best_in_group": get_best_in_group_objectives(learning_objectives)}

	def get_best_in_group_objectives(grouped_objectives: list) -> list:
	"""Return only objectives where best_in_group is True or id==1, ensuring Python bools."""
	best_in_group_objectives = []
	for obj in grouped_objectives:
	val = getattr(obj, "best_in_group", False)
	if isinstance(val, str):
	obj.best_in_group = val.lower() == "true"
	elif isinstance(val, (bool, int)):
	obj.best_in_group = bool(val)
	else:
	obj.best_in_group = False
	if obj.best_in_group is True:
	best_in_group_objectives.append(obj)
	return best_in_group_objectives


	def group_base_learning_objectives(client: OpenAI, model: str, temperature: float, base_objectives: List[BaseLearningObjective], file_contents: List[str]) -> Dict[str, List]:
	"""Group base learning objectives (without incorrect answer options) and return both the full grouped list and the best-in-group list."""
	try:
	print(f"Grouping {len(base_objectives)} base learning objectives")

	objectives_to_group = base_objectives

	if not objectives_to_group:
	return {"all_grouped": base_objectives, "best_in_group": base_objectives} # Nothing to group

	# Create combined content for context
	combined_content = "\n\n".join(file_contents)

	# Format the objectives for display in the prompt
	objectives_display = "\n".join([f"ID: {obj.id}\nLearning Objective: {obj.learning_objective}\nSource: {obj.source_reference}\nCorrect Answer: {getattr(obj, 'correct_answer', '')}\n" for obj in objectives_to_group])

	# Create prompt for grouping using the same context as generation but without duplicating content
	grouping_prompt = f"""
	The generation prompt below was used to generate the learning objectives and now your job is to group and determine the best in the group. Group according
	to topic overlap, and select the best in the group according to the criteria in the generation prompt.


	Here's the generation prompt:

	<generation prompt>

	You are an expert educational content creator specializing in creating precise, relevant learning objectives from course materials.

	{BASE_LEARNING_OBJECTIVES_PROMPT}

	<BloomsTaxonomyLevels>
	{BLOOMS_TAXONOMY_LEVELS}
	</BloomsTaxonomyLevels>

	Here is an example of high quality learning objectives:
	<learning objectives>
	{LEARNING_OBJECTIVE_EXAMPLES}
	</learning objectives>

	Below is the course content. The source references are embedded in xml tags within the context.
	<course content>
	{combined_content}
	</course content>

	</generation prompt>

	The learning objectives below were generated based on the content and criteria in the generation prompt above. Now your task is to group these learning objectives
	based on how well they meet the criteria described in the generation prompt above.

	IMPORTANT GROUPING INSTRUCTIONS:
	1. Group learning objectives by similarity, including those that cover the same foundational concept.
	2. Return a JSON array with each objective's original ID and its group information ("in_group": bool, "group_members": list[int], "best_in_group": bool). See example below.
	3. Consider clarity, specificity, alignment with the course content, and how well each objective follows the criteria in the generation prompt.
	4. Identify groups of similar learning objectives that cover essentially the same concept or knowledge area.
	5. For each objective, indicate whether it belongs to a group of similar objectives by setting "in_group" to true or false.
	6. For objectives that are part of a group, include a "group_members" field with a list of all IDs in that group (including the objective itself). If an objective is not part of a group, set "group_members" to a list containing only the objective's ID.
	7. For each objective, add a boolean field "best_in_group": set this to true for the highest-quality objective in each group, and false for all others in the group. For objectives not in a group, set "best_in_group" to true by default.
	8. SPECIAL INSTRUCTION: All objectives with IDs ending in 1 (like 1001, 2001, etc.) are the first objectives from different generation runs. Group ALL of these together and mark the best one as "best_in_group": true. This is critical for ensuring one of these objectives is selected as the primary objective:
	a. Group ALL objectives with IDs ending in 1 together in the SAME group.
	b. Evaluate these objectives carefully and select the SINGLE best one based on clarity, specificity, and alignment with course content.
	c. Mark ONLY the best one with "best_in_group": true and all others with "best_in_group": false.
	d. This objective will later be assigned ID=1 and will serve as the primary objective, so choose the highest quality one.
	e. If you find other objectives that cover the same concept but don't have IDs ending in 1, include them in this group but do NOT mark them as best_in_group.

	Here are the learning objectives to group:
	<learning_objectives>
	{objectives_display}
	</learning_objectives>

	Your response should be a JSON array of objects with this structure:
	[
	{{
	"id": int,
	"learning_objective": str,
	"source_reference": Union[List[str], str],
	"correct_answer": str,
	"in_group": bool,
	"group_members": list[int],
	"best_in_group": bool
	}},
	...
	]
	Example:
	[
	{{
	"id": 3,
	"learning_objective": "Describe the main applications of AI agents.",
	"source_reference": ["sc-Arize-C1-L3-eng.vtt"],
	"correct_answer": "AI agents are used for automation, decision-making, and information retrieval.",
	"in_group": true,
	"group_members": [3, 5, 7],
	"best_in_group": true
	}}
	]
	"""

	# Use OpenAI beta API for structured output
	try:
	params = {
	"model": "gpt-5-mini",
	"messages": [
	{"role": "system", "content": "You are an expert educational content evaluator."},
	{"role": "user", "content": grouping_prompt}
	],
	"response_format": GroupedBaseLearningObjectivesResponse
	}

	completion = client.beta.chat.completions.parse(**params)
	grouped_results = completion.choices[0].message.parsed.grouped_objectives
	print(f"Received {len(grouped_results)} grouped results")

	# Normalize best_in_group to Python bool
	for obj in grouped_results:
	val = getattr(obj, "best_in_group", False)
	if isinstance(val, str):
	obj.best_in_group = val.lower() == "true"
	elif isinstance(val, (bool, int)):
	obj.best_in_group = bool(val)
	else:
	obj.best_in_group = False

	# Initialize final_objectives with the grouped results
	final_objectives = []
	for obj in grouped_results:
	final_objectives.append(obj)

	# Filter for best-in-group objectives (including id==1 always)
	best_in_group_objectives = [obj for obj in final_objectives if getattr(obj, "best_in_group", False) is True]

	return {
	"all_grouped": final_objectives,
	"best_in_group": best_in_group_objectives
	}

	except Exception as e:
	print(f"Error grouping base learning objectives: {e}")
	# If there's an error, just mark all objectives as best-in-group
	for obj in base_objectives:
	obj.in_group = False
	obj.group_members = [obj.id]
	obj.best_in_group = True
	return {"all_grouped": base_objectives, "best_in_group": base_objectives}

	except Exception as e:
	print(f"Error grouping base learning objectives: {e}")
	# If there's an error, just mark all objectives as best-in-group
	for obj in base_objectives:
	obj.in_group = False
	obj.group_members = [obj.id]
	obj.best_in_group = True
	return {"all_grouped": base_objectives, "best_in_group": base_objectives}