Commit ·
421b5c9
1
Parent(s): 2a1cd96
fixed duplicates in grouping
Browse files- backend/grouping_logic.py +81 -6
backend/grouping_logic.py
CHANGED
|
@@ -180,8 +180,21 @@ CRITICAL RULES:
|
|
| 180 |
✓ Each group needs grade diversity: Mix high (>18) with medium (16-18) performers
|
| 181 |
✓ Prioritize complementary MBTI types over similar types
|
| 182 |
✓ Use provided data fields - DO NOT invent values
|
| 183 |
-
|
| 184 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 185 |
|
| 186 |
OUTPUT FORMAT (Valid JSON Only):
|
| 187 |
{{
|
|
@@ -207,7 +220,23 @@ OUTPUT FORMAT (Valid JSON Only):
|
|
| 207 |
'messages': [
|
| 208 |
{
|
| 209 |
'role': 'system',
|
| 210 |
-
'content': 'You are a precise algorithmic grouping assistant. You MUST output ONLY valid JSON - no markdown, no code blocks, no extra text. Start directly with { and end with }.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 211 |
},
|
| 212 |
{
|
| 213 |
'role': 'user',
|
|
@@ -294,17 +323,52 @@ OUTPUT FORMAT (Valid JSON Only):
|
|
| 294 |
print(f"❌ No JSON found in response. Full content:\n{content}")
|
| 295 |
raise Exception("Invalid JSON from API - no valid JSON structure found")
|
| 296 |
|
| 297 |
-
# Failsafe:
|
| 298 |
assigned_students = set()
|
|
|
|
|
|
|
|
|
|
| 299 |
for group in grouping_result['groups']:
|
| 300 |
if 'students' in group:
|
| 301 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 302 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 303 |
all_ids = [s.studentNumber for s in students]
|
| 304 |
missing = [id for id in all_ids if id not in assigned_students]
|
| 305 |
|
| 306 |
if missing:
|
| 307 |
-
print(f'AI missed students, adding to last group: {missing}')
|
| 308 |
if grouping_result['groups']:
|
| 309 |
grouping_result['groups'][-1]['students'].extend(missing)
|
| 310 |
grouping_result['groups'][-1]['reasoning'] += f" (سیستم دانشآموزان {', '.join(missing)} را به این گروه اضافه کرد)"
|
|
@@ -315,6 +379,17 @@ OUTPUT FORMAT (Valid JSON Only):
|
|
| 315 |
"reasoning": "گروه بازیابی شده توسط سیستم"
|
| 316 |
})
|
| 317 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 318 |
return grouping_result
|
| 319 |
|
| 320 |
async def random_grouping(students: List[Any]) -> Dict[str, Any]:
|
|
|
|
| 180 |
✓ Each group needs grade diversity: Mix high (>18) with medium (16-18) performers
|
| 181 |
✓ Prioritize complementary MBTI types over similar types
|
| 182 |
✓ Use provided data fields - DO NOT invent values
|
| 183 |
+
|
| 184 |
+
🚨 MANDATORY DUPLICATE PREVENTION (HIGHEST PRIORITY) 🚨
|
| 185 |
+
This is a HARD CONSTRAINT, not a guideline:
|
| 186 |
+
✓ Each student ID (S001, S002, etc.) can appear in EXACTLY ONE group
|
| 187 |
+
✓ NO student can be in multiple groups - this would be a CRITICAL ERROR
|
| 188 |
+
✓ Before outputting, verify EVERY student ID appears exactly once
|
| 189 |
+
✓ If you find a duplicate, STOP and fix it immediately
|
| 190 |
+
✓ Total students in all groups MUST equal {total_students}
|
| 191 |
+
|
| 192 |
+
VALIDATION CHECKLIST (complete this mentally before responding):
|
| 193 |
+
□ Step 1: List all student IDs used across all groups
|
| 194 |
+
□ Step 2: Check if any ID appears more than once → if YES, remove duplicates
|
| 195 |
+
□ Step 3: Count total students in groups → must equal {total_students}
|
| 196 |
+
□ Step 4: Check for missing students → add them to appropriate groups
|
| 197 |
+
□ Step 5: Verify no duplicates exist → if duplicates found, START OVER
|
| 198 |
|
| 199 |
OUTPUT FORMAT (Valid JSON Only):
|
| 200 |
{{
|
|
|
|
| 220 |
'messages': [
|
| 221 |
{
|
| 222 |
'role': 'system',
|
| 223 |
+
'content': '''You are a precise algorithmic grouping assistant. You MUST output ONLY valid JSON - no markdown, no code blocks, no extra text. Start directly with { and end with }.
|
| 224 |
+
|
| 225 |
+
🚨 CRITICAL DUPLICATE PREVENTION RULE 🚨
|
| 226 |
+
This is the MOST IMPORTANT rule - violating this makes your output INVALID:
|
| 227 |
+
• Each student ID (e.g., S001, S002) can appear in EXACTLY ONE group
|
| 228 |
+
• NO DUPLICATES ALLOWED - putting a student in multiple groups is a CRITICAL ERROR
|
| 229 |
+
• Before you output, you MUST verify: count how many times each student ID appears across ALL groups
|
| 230 |
+
• If ANY student ID appears more than once, your output is REJECTED
|
| 231 |
+
• If the total count of students in all groups ≠ total input students, your output is REJECTED
|
| 232 |
+
|
| 233 |
+
VALIDATION STEPS (do this before outputting):
|
| 234 |
+
1. Make a list of ALL student IDs from all groups you created
|
| 235 |
+
2. Check if any ID appears 2 or more times → if YES, remove duplicates
|
| 236 |
+
3. Count total students: sum of all group sizes must equal the TOTAL STUDENTS number
|
| 237 |
+
4. Verify each input student ID appears exactly once
|
| 238 |
+
|
| 239 |
+
You rely on the explicit "mbti_analysis" fields provided in the user prompt for your reasoning.'''
|
| 240 |
},
|
| 241 |
{
|
| 242 |
'role': 'user',
|
|
|
|
| 323 |
print(f"❌ No JSON found in response. Full content:\n{content}")
|
| 324 |
raise Exception("Invalid JSON from API - no valid JSON structure found")
|
| 325 |
|
| 326 |
+
# Failsafe: Detect and remove duplicates, then add missing students
|
| 327 |
assigned_students = set()
|
| 328 |
+
duplicate_students = set()
|
| 329 |
+
|
| 330 |
+
# First pass: detect duplicates
|
| 331 |
for group in grouping_result['groups']:
|
| 332 |
if 'students' in group:
|
| 333 |
+
for student_id in group['students']:
|
| 334 |
+
if student_id in assigned_students:
|
| 335 |
+
duplicate_students.add(student_id)
|
| 336 |
+
print(f'⚠️ DUPLICATE DETECTED: {student_id} appears in multiple groups!')
|
| 337 |
+
else:
|
| 338 |
+
assigned_students.add(student_id)
|
| 339 |
|
| 340 |
+
# Second pass: remove duplicates (keep first occurrence only)
|
| 341 |
+
if duplicate_students:
|
| 342 |
+
print(f'🔧 Removing duplicates: {duplicate_students}')
|
| 343 |
+
first_occurrence = {}
|
| 344 |
+
for i, group in enumerate(grouping_result['groups']):
|
| 345 |
+
if 'students' in group:
|
| 346 |
+
cleaned_students = []
|
| 347 |
+
for student_id in group['students']:
|
| 348 |
+
if student_id in duplicate_students:
|
| 349 |
+
if student_id not in first_occurrence:
|
| 350 |
+
# Keep first occurrence
|
| 351 |
+
first_occurrence[student_id] = i
|
| 352 |
+
cleaned_students.append(student_id)
|
| 353 |
+
else:
|
| 354 |
+
# Remove duplicate
|
| 355 |
+
print(f' Removing {student_id} from group {group["groupNumber"]}')
|
| 356 |
+
else:
|
| 357 |
+
cleaned_students.append(student_id)
|
| 358 |
+
group['students'] = cleaned_students
|
| 359 |
+
|
| 360 |
+
# Rebuild assigned_students set after cleaning
|
| 361 |
+
assigned_students = set()
|
| 362 |
+
for group in grouping_result['groups']:
|
| 363 |
+
if 'students' in group:
|
| 364 |
+
assigned_students.update(group['students'])
|
| 365 |
+
|
| 366 |
+
# Third pass: add missing students
|
| 367 |
all_ids = [s.studentNumber for s in students]
|
| 368 |
missing = [id for id in all_ids if id not in assigned_students]
|
| 369 |
|
| 370 |
if missing:
|
| 371 |
+
print(f'⚠️ AI missed students, adding to last group: {missing}')
|
| 372 |
if grouping_result['groups']:
|
| 373 |
grouping_result['groups'][-1]['students'].extend(missing)
|
| 374 |
grouping_result['groups'][-1]['reasoning'] += f" (سیستم دانشآموزان {', '.join(missing)} را به این گروه اضافه کرد)"
|
|
|
|
| 379 |
"reasoning": "گروه بازیابی شده توسط سیستم"
|
| 380 |
})
|
| 381 |
|
| 382 |
+
# Final verification
|
| 383 |
+
final_assigned = set()
|
| 384 |
+
for group in grouping_result['groups']:
|
| 385 |
+
if 'students' in group:
|
| 386 |
+
final_assigned.update(group['students'])
|
| 387 |
+
|
| 388 |
+
if len(final_assigned) != len(students):
|
| 389 |
+
print(f'❌ ERROR: Final count mismatch! Expected {len(students)}, got {len(final_assigned)}')
|
| 390 |
+
else:
|
| 391 |
+
print(f'✅ Verification passed: All {len(students)} students assigned exactly once')
|
| 392 |
+
|
| 393 |
return grouping_result
|
| 394 |
|
| 395 |
async def random_grouping(students: List[Any]) -> Dict[str, Any]:
|