from typing import List, Dict, Any from openai import OpenAI import json from models import LearningObjective, BaseLearningObjective, GroupedLearningObjectivesResponse, GroupedBaseLearningObjectivesResponse from prompts.learning_objectives import BASE_LEARNING_OBJECTIVES_PROMPT, BLOOMS_TAXONOMY_LEVELS, LEARNING_OBJECTIVE_EXAMPLES def group_learning_objectives(client: OpenAI, model: str, temperature: float, learning_objectives: List[LearningObjective], file_contents: List[str]) -> dict: """Group learning objectives and return both the full ranked list and the best-in-group list as Python objects.""" try: print(f"Grouping {len(learning_objectives)} learning objectives") objectives_to_rank = learning_objectives if not objectives_to_rank: return learning_objectives # Nothing to rank # Create combined content for context combined_content = "\n\n".join(file_contents) # Format the objectives for display in the prompt objectives_display = "\n".join([f"ID: {obj.id}\nLearning Objective: {obj.learning_objective}\nSource: {obj.source_reference}\nCorrect Answer: {getattr(obj, 'correct_answer', '')}\nIncorrect Answer Options: {json.dumps(getattr(obj, 'incorrect_answer_options', []))}\n" for obj in objectives_to_rank]) # Create prompt for ranking using the same context as generation but without duplicating content ranking_prompt = f""" The generation prompt below was used to generate the learning objectives and now your job is to group and determine the best in the group. Group according to topic overlap, and select the best in the group according to the criteria in the generation prompt. Here's the generation prompt: You are an expert educational content creator specializing in creating precise, relevant learning objectives from course materials. {BASE_LEARNING_OBJECTIVES_PROMPT} {BLOOMS_TAXONOMY_LEVELS} Here is an example of high quality learning objectives: {LEARNING_OBJECTIVE_EXAMPLES} Use the below course content to assess topic overlap. The source references are embedded in xml tags within the context. {combined_content} The learning objectives below were generated based on the content and criteria in the generation prompt above. Now your task is to group these learning objectives based on how well they meet the criteria described in the generation prompt above. IMPORTANT GROUPING INSTRUCTIONS: 1. Group learning objectives by similarity, including those that cover the same foundational concept. 2. Return a JSON array with each objective's original ID and its group information ("in_group": bool, "group_members": list[int], "best_in_group": bool). See example below. 3. Consider clarity, specificity, alignment with the course content, and how well each objective follows the criteria in the generation prompt. 4. Identify groups of similar learning objectives that cover essentially the same concept or knowledge area. 5. For each objective, indicate whether it belongs to a group of similar objectives by setting "in_group" to true or false. 6. For objectives that are part of a group, include a "group_members" field with a list of all IDs in that group (including the objective itself). If an objective is not part of a group, set "group_members" to a list containing only the objective's ID. 7. For each objective, add a boolean field "best_in_group": set this to true for the highest-quality objective in each group, and false for all others in the group. For objectives not in a group, set "best_in_group" to true by default. 8. SPECIAL INSTRUCTION: All objectives with IDs ending in 1 (like 1001, 2001, etc.) are the first objectives from different generation runs. Group ALL of these together and mark the best one as "best_in_group": true. This is critical for ensuring one of these objectives is selected as the primary objective: a. Group ALL objectives with IDs ending in 1 together in the SAME group. b. Evaluate these objectives carefully and select the SINGLE best one based on clarity, specificity, and alignment with course content. c. Mark ONLY the best one with "best_in_group": true and all others with "best_in_group": false. d. This objective will later be assigned ID=1 and will serve as the primary objective, so choose the highest quality one. e. If you find other objectives that cover the same concept but don't have IDs ending in 1, include them in this group but do NOT mark them as best_in_group. Here are the learning objectives to group: {objectives_display} Return your grouped learning objectives as a JSON array in this format. Each objective must include ALL of the following fields: [ {{ "id": int, "learning_objective": str, "source_reference": list[str] or str, "correct_answer": str, "incorrect_answer_suggestions": list[str], "in_group": bool, "group_members": list[int], "best_in_group": bool }}, ... ] Example: [ {{ "id": 3, "learning_objective": "Describe the main applications of AI agents.", "source_reference": ["sc-Arize-C1-L3-eng.vtt"], "correct_answer": "AI agents are used for automation, decision-making, and information retrieval.", "incorrect_answer_suggestions": [ "AI agents are used for automation and data analysis", "AI agents are designed for information retrieval and prediction", "AI agents are specialized for either automation or decision-making", ], "in_group": true, "group_members": [3, 5, 7], "best_in_group": true }} ] """ # Use OpenAI beta API for structured output try: params = { "model": "gpt-5-mini", "messages": [ {"role": "system", "content": "You are an expert educational content evaluator."}, {"role": "user", "content": ranking_prompt} ], "response_format": GroupedLearningObjectivesResponse } completion = client.beta.chat.completions.parse(**params) grouped_results = completion.choices[0].message.parsed.grouped_objectives print(f"Received {len(grouped_results)} grouped results") # Normalize best_in_group to Python bool for obj in grouped_results: val = getattr(obj, "best_in_group", False) if isinstance(val, str): obj.best_in_group = val.lower() == "true" elif isinstance(val, (bool, int)): obj.best_in_group = bool(val) else: obj.best_in_group = False # if id_one_objective: # final_objectives[0].best_in_group = True # Initialize final_objectives with the grouped results final_objectives = [] for obj in grouped_results: final_objectives.append(obj) # Filter for best-in-group objectives (including id==1 always) best_in_group_objectives = [obj for obj in final_objectives if getattr(obj, "best_in_group", False) is True] return { "all_grouped": final_objectives, "best_in_group": best_in_group_objectives } except Exception as e: print(f"Error ranking learning objectives: {e}") return {"all_grouped": learning_objectives, "best_in_group": get_best_in_group_objectives(learning_objectives)} except Exception as e: print(f"Error ranking learning objectives: {e}") return {"all_grouped": learning_objectives, "best_in_group": get_best_in_group_objectives(learning_objectives)} def get_best_in_group_objectives(grouped_objectives: list) -> list: """Return only objectives where best_in_group is True or id==1, ensuring Python bools.""" best_in_group_objectives = [] for obj in grouped_objectives: val = getattr(obj, "best_in_group", False) if isinstance(val, str): obj.best_in_group = val.lower() == "true" elif isinstance(val, (bool, int)): obj.best_in_group = bool(val) else: obj.best_in_group = False if obj.best_in_group is True: best_in_group_objectives.append(obj) return best_in_group_objectives def group_base_learning_objectives(client: OpenAI, model: str, temperature: float, base_objectives: List[BaseLearningObjective], file_contents: List[str]) -> Dict[str, List]: """Group base learning objectives (without incorrect answer options) and return both the full grouped list and the best-in-group list.""" try: print(f"Grouping {len(base_objectives)} base learning objectives") objectives_to_group = base_objectives if not objectives_to_group: return {"all_grouped": base_objectives, "best_in_group": base_objectives} # Nothing to group # Create combined content for context combined_content = "\n\n".join(file_contents) # Format the objectives for display in the prompt objectives_display = "\n".join([f"ID: {obj.id}\nLearning Objective: {obj.learning_objective}\nSource: {obj.source_reference}\nCorrect Answer: {getattr(obj, 'correct_answer', '')}\n" for obj in objectives_to_group]) # Create prompt for grouping using the same context as generation but without duplicating content grouping_prompt = f""" The generation prompt below was used to generate the learning objectives and now your job is to group and determine the best in the group. Group according to topic overlap, and select the best in the group according to the criteria in the generation prompt. Here's the generation prompt: You are an expert educational content creator specializing in creating precise, relevant learning objectives from course materials. {BASE_LEARNING_OBJECTIVES_PROMPT} {BLOOMS_TAXONOMY_LEVELS} Here is an example of high quality learning objectives: {LEARNING_OBJECTIVE_EXAMPLES} Below is the course content. The source references are embedded in xml tags within the context. {combined_content} The learning objectives below were generated based on the content and criteria in the generation prompt above. Now your task is to group these learning objectives based on how well they meet the criteria described in the generation prompt above. IMPORTANT GROUPING INSTRUCTIONS: 1. Group learning objectives by similarity, including those that cover the same foundational concept. 2. Return a JSON array with each objective's original ID and its group information ("in_group": bool, "group_members": list[int], "best_in_group": bool). See example below. 3. Consider clarity, specificity, alignment with the course content, and how well each objective follows the criteria in the generation prompt. 4. Identify groups of similar learning objectives that cover essentially the same concept or knowledge area. 5. For each objective, indicate whether it belongs to a group of similar objectives by setting "in_group" to true or false. 6. For objectives that are part of a group, include a "group_members" field with a list of all IDs in that group (including the objective itself). If an objective is not part of a group, set "group_members" to a list containing only the objective's ID. 7. For each objective, add a boolean field "best_in_group": set this to true for the highest-quality objective in each group, and false for all others in the group. For objectives not in a group, set "best_in_group" to true by default. 8. SPECIAL INSTRUCTION: All objectives with IDs ending in 1 (like 1001, 2001, etc.) are the first objectives from different generation runs. Group ALL of these together and mark the best one as "best_in_group": true. This is critical for ensuring one of these objectives is selected as the primary objective: a. Group ALL objectives with IDs ending in 1 together in the SAME group. b. Evaluate these objectives carefully and select the SINGLE best one based on clarity, specificity, and alignment with course content. c. Mark ONLY the best one with "best_in_group": true and all others with "best_in_group": false. d. This objective will later be assigned ID=1 and will serve as the primary objective, so choose the highest quality one. e. If you find other objectives that cover the same concept but don't have IDs ending in 1, include them in this group but do NOT mark them as best_in_group. Here are the learning objectives to group: {objectives_display} Your response should be a JSON array of objects with this structure: [ {{ "id": int, "learning_objective": str, "source_reference": Union[List[str], str], "correct_answer": str, "in_group": bool, "group_members": list[int], "best_in_group": bool }}, ... ] Example: [ {{ "id": 3, "learning_objective": "Describe the main applications of AI agents.", "source_reference": ["sc-Arize-C1-L3-eng.vtt"], "correct_answer": "AI agents are used for automation, decision-making, and information retrieval.", "in_group": true, "group_members": [3, 5, 7], "best_in_group": true }} ] """ # Use OpenAI beta API for structured output try: params = { "model": "gpt-5-mini", "messages": [ {"role": "system", "content": "You are an expert educational content evaluator."}, {"role": "user", "content": grouping_prompt} ], "response_format": GroupedBaseLearningObjectivesResponse } completion = client.beta.chat.completions.parse(**params) grouped_results = completion.choices[0].message.parsed.grouped_objectives print(f"Received {len(grouped_results)} grouped results") # Normalize best_in_group to Python bool for obj in grouped_results: val = getattr(obj, "best_in_group", False) if isinstance(val, str): obj.best_in_group = val.lower() == "true" elif isinstance(val, (bool, int)): obj.best_in_group = bool(val) else: obj.best_in_group = False # Initialize final_objectives with the grouped results final_objectives = [] for obj in grouped_results: final_objectives.append(obj) # Filter for best-in-group objectives (including id==1 always) best_in_group_objectives = [obj for obj in final_objectives if getattr(obj, "best_in_group", False) is True] return { "all_grouped": final_objectives, "best_in_group": best_in_group_objectives } except Exception as e: print(f"Error grouping base learning objectives: {e}") # If there's an error, just mark all objectives as best-in-group for obj in base_objectives: obj.in_group = False obj.group_members = [obj.id] obj.best_in_group = True return {"all_grouped": base_objectives, "best_in_group": base_objectives} except Exception as e: print(f"Error grouping base learning objectives: {e}") # If there's an error, just mark all objectives as best-in-group for obj in base_objectives: obj.in_group = False obj.group_members = [obj.id] obj.best_in_group = True return {"all_grouped": base_objectives, "best_in_group": base_objectives}