Spaces:
Sleeping
Sleeping
| from typing import List, Dict, Any | |
| from openai import OpenAI | |
| import json | |
| from models import LearningObjective, BaseLearningObjective, GroupedLearningObjectivesResponse, GroupedBaseLearningObjectivesResponse | |
| from prompts.learning_objectives import BASE_LEARNING_OBJECTIVES_PROMPT, BLOOMS_TAXONOMY_LEVELS, LEARNING_OBJECTIVE_EXAMPLES | |
| def group_learning_objectives(client: OpenAI, model: str, temperature: float, learning_objectives: List[LearningObjective], file_contents: List[str]) -> dict: | |
| """Group learning objectives and return both the full ranked list and the best-in-group list as Python objects.""" | |
| try: | |
| print(f"Grouping {len(learning_objectives)} learning objectives") | |
| objectives_to_rank = learning_objectives | |
| if not objectives_to_rank: | |
| return learning_objectives # Nothing to rank | |
| # Create combined content for context | |
| combined_content = "\n\n".join(file_contents) | |
| # Format the objectives for display in the prompt | |
| objectives_display = "\n".join([f"ID: {obj.id}\nLearning Objective: {obj.learning_objective}\nSource: {obj.source_reference}\nCorrect Answer: {getattr(obj, 'correct_answer', '')}\nIncorrect Answer Options: {json.dumps(getattr(obj, 'incorrect_answer_options', []))}\n" for obj in objectives_to_rank]) | |
| # Create prompt for ranking using the same context as generation but without duplicating content | |
| ranking_prompt = f""" | |
| The generation prompt below was used to generate the learning objectives and now your job is to group and determine the best in the group. Group according | |
| to topic overlap, and select the best in the group according to the criteria in the generation prompt. | |
| Here's the generation prompt: | |
| <generation prompt> | |
| You are an expert educational content creator specializing in creating precise, relevant learning objectives from course materials. | |
| {BASE_LEARNING_OBJECTIVES_PROMPT} | |
| <BloomsTaxonomyLevels> | |
| {BLOOMS_TAXONOMY_LEVELS} | |
| </BloomsTaxonomyLevels> | |
| Here is an example of high quality learning objectives: | |
| <learning objectives> | |
| {LEARNING_OBJECTIVE_EXAMPLES} | |
| </learning objectives> | |
| Use the below course content to assess topic overlap. The source references are embedded in xml tags within the context. | |
| <course content> | |
| {combined_content} | |
| </course content> | |
| </generation prompt> | |
| The learning objectives below were generated based on the content and criteria in the generation prompt above. Now your task is to group these learning objectives | |
| based on how well they meet the criteria described in the generation prompt above. | |
| IMPORTANT GROUPING INSTRUCTIONS: | |
| 1. Group learning objectives by similarity, including those that cover the same foundational concept. | |
| 2. Return a JSON array with each objective's original ID and its group information ("in_group": bool, "group_members": list[int], "best_in_group": bool). See example below. | |
| 3. Consider clarity, specificity, alignment with the course content, and how well each objective follows the criteria in the generation prompt. | |
| 4. Identify groups of similar learning objectives that cover essentially the same concept or knowledge area. | |
| 5. For each objective, indicate whether it belongs to a group of similar objectives by setting "in_group" to true or false. | |
| 6. For objectives that are part of a group, include a "group_members" field with a list of all IDs in that group (including the objective itself). If an objective is not part of a group, set "group_members" to a list containing only the objective's ID. | |
| 7. For each objective, add a boolean field "best_in_group": set this to true for the highest-quality objective in each group, and false for all others in the group. For objectives not in a group, set "best_in_group" to true by default. | |
| 8. SPECIAL INSTRUCTION: All objectives with IDs ending in 1 (like 1001, 2001, etc.) are the first objectives from different generation runs. Group ALL of these together and mark the best one as "best_in_group": true. This is critical for ensuring one of these objectives is selected as the primary objective: | |
| a. Group ALL objectives with IDs ending in 1 together in the SAME group. | |
| b. Evaluate these objectives carefully and select the SINGLE best one based on clarity, specificity, and alignment with course content. | |
| c. Mark ONLY the best one with "best_in_group": true and all others with "best_in_group": false. | |
| d. This objective will later be assigned ID=1 and will serve as the primary objective, so choose the highest quality one. | |
| e. If you find other objectives that cover the same concept but don't have IDs ending in 1, include them in this group but do NOT mark them as best_in_group. | |
| Here are the learning objectives to group: | |
| <learning objectives> | |
| {objectives_display} | |
| </learning objectives> | |
| Return your grouped learning objectives as a JSON array in this format. Each objective must include ALL of the following fields: | |
| [ | |
| {{ | |
| "id": int, | |
| "learning_objective": str, | |
| "source_reference": list[str] or str, | |
| "correct_answer": str, | |
| "incorrect_answer_suggestions": list[str], | |
| "in_group": bool, | |
| "group_members": list[int], | |
| "best_in_group": bool | |
| }}, | |
| ... | |
| ] | |
| Example: | |
| [ | |
| {{ | |
| "id": 3, | |
| "learning_objective": "Describe the main applications of AI agents.", | |
| "source_reference": ["sc-Arize-C1-L3-eng.vtt"], | |
| "correct_answer": "AI agents are used for automation, decision-making, and information retrieval.", | |
| "incorrect_answer_suggestions": [ | |
| "AI agents are used for automation and data analysis", | |
| "AI agents are designed for information retrieval and prediction", | |
| "AI agents are specialized for either automation or decision-making", | |
| ], | |
| "in_group": true, | |
| "group_members": [3, 5, 7], | |
| "best_in_group": true | |
| }} | |
| ] | |
| """ | |
| # Use OpenAI beta API for structured output | |
| try: | |
| params = { | |
| "model": "gpt-5-mini", | |
| "messages": [ | |
| {"role": "system", "content": "You are an expert educational content evaluator."}, | |
| {"role": "user", "content": ranking_prompt} | |
| ], | |
| "response_format": GroupedLearningObjectivesResponse | |
| } | |
| completion = client.beta.chat.completions.parse(**params) | |
| grouped_results = completion.choices[0].message.parsed.grouped_objectives | |
| print(f"Received {len(grouped_results)} grouped results") | |
| # Normalize best_in_group to Python bool | |
| for obj in grouped_results: | |
| val = getattr(obj, "best_in_group", False) | |
| if isinstance(val, str): | |
| obj.best_in_group = val.lower() == "true" | |
| elif isinstance(val, (bool, int)): | |
| obj.best_in_group = bool(val) | |
| else: | |
| obj.best_in_group = False | |
| # if id_one_objective: | |
| # final_objectives[0].best_in_group = True | |
| # Initialize final_objectives with the grouped results | |
| final_objectives = [] | |
| for obj in grouped_results: | |
| final_objectives.append(obj) | |
| # Filter for best-in-group objectives (including id==1 always) | |
| best_in_group_objectives = [obj for obj in final_objectives if getattr(obj, "best_in_group", False) is True] | |
| return { | |
| "all_grouped": final_objectives, | |
| "best_in_group": best_in_group_objectives | |
| } | |
| except Exception as e: | |
| print(f"Error ranking learning objectives: {e}") | |
| return {"all_grouped": learning_objectives, "best_in_group": get_best_in_group_objectives(learning_objectives)} | |
| except Exception as e: | |
| print(f"Error ranking learning objectives: {e}") | |
| return {"all_grouped": learning_objectives, "best_in_group": get_best_in_group_objectives(learning_objectives)} | |
| def get_best_in_group_objectives(grouped_objectives: list) -> list: | |
| """Return only objectives where best_in_group is True or id==1, ensuring Python bools.""" | |
| best_in_group_objectives = [] | |
| for obj in grouped_objectives: | |
| val = getattr(obj, "best_in_group", False) | |
| if isinstance(val, str): | |
| obj.best_in_group = val.lower() == "true" | |
| elif isinstance(val, (bool, int)): | |
| obj.best_in_group = bool(val) | |
| else: | |
| obj.best_in_group = False | |
| if obj.best_in_group is True: | |
| best_in_group_objectives.append(obj) | |
| return best_in_group_objectives | |
| def group_base_learning_objectives(client: OpenAI, model: str, temperature: float, base_objectives: List[BaseLearningObjective], file_contents: List[str]) -> Dict[str, List]: | |
| """Group base learning objectives (without incorrect answer options) and return both the full grouped list and the best-in-group list.""" | |
| try: | |
| print(f"Grouping {len(base_objectives)} base learning objectives") | |
| objectives_to_group = base_objectives | |
| if not objectives_to_group: | |
| return {"all_grouped": base_objectives, "best_in_group": base_objectives} # Nothing to group | |
| # Create combined content for context | |
| combined_content = "\n\n".join(file_contents) | |
| # Format the objectives for display in the prompt | |
| objectives_display = "\n".join([f"ID: {obj.id}\nLearning Objective: {obj.learning_objective}\nSource: {obj.source_reference}\nCorrect Answer: {getattr(obj, 'correct_answer', '')}\n" for obj in objectives_to_group]) | |
| # Create prompt for grouping using the same context as generation but without duplicating content | |
| grouping_prompt = f""" | |
| The generation prompt below was used to generate the learning objectives and now your job is to group and determine the best in the group. Group according | |
| to topic overlap, and select the best in the group according to the criteria in the generation prompt. | |
| Here's the generation prompt: | |
| <generation prompt> | |
| You are an expert educational content creator specializing in creating precise, relevant learning objectives from course materials. | |
| {BASE_LEARNING_OBJECTIVES_PROMPT} | |
| <BloomsTaxonomyLevels> | |
| {BLOOMS_TAXONOMY_LEVELS} | |
| </BloomsTaxonomyLevels> | |
| Here is an example of high quality learning objectives: | |
| <learning objectives> | |
| {LEARNING_OBJECTIVE_EXAMPLES} | |
| </learning objectives> | |
| Below is the course content. The source references are embedded in xml tags within the context. | |
| <course content> | |
| {combined_content} | |
| </course content> | |
| </generation prompt> | |
| The learning objectives below were generated based on the content and criteria in the generation prompt above. Now your task is to group these learning objectives | |
| based on how well they meet the criteria described in the generation prompt above. | |
| IMPORTANT GROUPING INSTRUCTIONS: | |
| 1. Group learning objectives by similarity, including those that cover the same foundational concept. | |
| 2. Return a JSON array with each objective's original ID and its group information ("in_group": bool, "group_members": list[int], "best_in_group": bool). See example below. | |
| 3. Consider clarity, specificity, alignment with the course content, and how well each objective follows the criteria in the generation prompt. | |
| 4. Identify groups of similar learning objectives that cover essentially the same concept or knowledge area. | |
| 5. For each objective, indicate whether it belongs to a group of similar objectives by setting "in_group" to true or false. | |
| 6. For objectives that are part of a group, include a "group_members" field with a list of all IDs in that group (including the objective itself). If an objective is not part of a group, set "group_members" to a list containing only the objective's ID. | |
| 7. For each objective, add a boolean field "best_in_group": set this to true for the highest-quality objective in each group, and false for all others in the group. For objectives not in a group, set "best_in_group" to true by default. | |
| 8. SPECIAL INSTRUCTION: All objectives with IDs ending in 1 (like 1001, 2001, etc.) are the first objectives from different generation runs. Group ALL of these together and mark the best one as "best_in_group": true. This is critical for ensuring one of these objectives is selected as the primary objective: | |
| a. Group ALL objectives with IDs ending in 1 together in the SAME group. | |
| b. Evaluate these objectives carefully and select the SINGLE best one based on clarity, specificity, and alignment with course content. | |
| c. Mark ONLY the best one with "best_in_group": true and all others with "best_in_group": false. | |
| d. This objective will later be assigned ID=1 and will serve as the primary objective, so choose the highest quality one. | |
| e. If you find other objectives that cover the same concept but don't have IDs ending in 1, include them in this group but do NOT mark them as best_in_group. | |
| Here are the learning objectives to group: | |
| <learning_objectives> | |
| {objectives_display} | |
| </learning_objectives> | |
| Your response should be a JSON array of objects with this structure: | |
| [ | |
| {{ | |
| "id": int, | |
| "learning_objective": str, | |
| "source_reference": Union[List[str], str], | |
| "correct_answer": str, | |
| "in_group": bool, | |
| "group_members": list[int], | |
| "best_in_group": bool | |
| }}, | |
| ... | |
| ] | |
| Example: | |
| [ | |
| {{ | |
| "id": 3, | |
| "learning_objective": "Describe the main applications of AI agents.", | |
| "source_reference": ["sc-Arize-C1-L3-eng.vtt"], | |
| "correct_answer": "AI agents are used for automation, decision-making, and information retrieval.", | |
| "in_group": true, | |
| "group_members": [3, 5, 7], | |
| "best_in_group": true | |
| }} | |
| ] | |
| """ | |
| # Use OpenAI beta API for structured output | |
| try: | |
| params = { | |
| "model": "gpt-5-mini", | |
| "messages": [ | |
| {"role": "system", "content": "You are an expert educational content evaluator."}, | |
| {"role": "user", "content": grouping_prompt} | |
| ], | |
| "response_format": GroupedBaseLearningObjectivesResponse | |
| } | |
| completion = client.beta.chat.completions.parse(**params) | |
| grouped_results = completion.choices[0].message.parsed.grouped_objectives | |
| print(f"Received {len(grouped_results)} grouped results") | |
| # Normalize best_in_group to Python bool | |
| for obj in grouped_results: | |
| val = getattr(obj, "best_in_group", False) | |
| if isinstance(val, str): | |
| obj.best_in_group = val.lower() == "true" | |
| elif isinstance(val, (bool, int)): | |
| obj.best_in_group = bool(val) | |
| else: | |
| obj.best_in_group = False | |
| # Initialize final_objectives with the grouped results | |
| final_objectives = [] | |
| for obj in grouped_results: | |
| final_objectives.append(obj) | |
| # Filter for best-in-group objectives (including id==1 always) | |
| best_in_group_objectives = [obj for obj in final_objectives if getattr(obj, "best_in_group", False) is True] | |
| return { | |
| "all_grouped": final_objectives, | |
| "best_in_group": best_in_group_objectives | |
| } | |
| except Exception as e: | |
| print(f"Error grouping base learning objectives: {e}") | |
| # If there's an error, just mark all objectives as best-in-group | |
| for obj in base_objectives: | |
| obj.in_group = False | |
| obj.group_members = [obj.id] | |
| obj.best_in_group = True | |
| return {"all_grouped": base_objectives, "best_in_group": base_objectives} | |
| except Exception as e: | |
| print(f"Error grouping base learning objectives: {e}") | |
| # If there's an error, just mark all objectives as best-in-group | |
| for obj in base_objectives: | |
| obj.in_group = False | |
| obj.group_members = [obj.id] | |
| obj.best_in_group = True | |
| return {"all_grouped": base_objectives, "best_in_group": base_objectives} | |