Spaces:
Sleeping
Sleeping
| """ | |
| VedLinks AI/ML Quality Test Script | |
| Tests all 6 bug fixes: | |
| BUG-01: Difficulty-aware question selection | |
| BUG-02: Question deduplication | |
| BUG-03: Bloom's taxonomy enforcement | |
| BUG-04: Multi-chapter distribution | |
| BUG-05: Distractor quality (shuffle verification) | |
| BUG-06: Answer distribution balancing (A/B/C/D roughly equal) | |
| """ | |
| import sys | |
| import json | |
| from collections import Counter | |
| # Add project root to path | |
| sys.path.insert(0, '.') | |
| from question_paper_generator import QuestionPaperGenerator, NCERT_KNOWLEDGE, get_generator | |
| def test_bug01_difficulty_awareness(): | |
| """BUG-01: Verify that questions at different difficulty levels are different.""" | |
| print("\n" + "=" * 60) | |
| print("TEST BUG-01: Difficulty-Aware Question Selection") | |
| print("=" * 60) | |
| gen = QuestionPaperGenerator() | |
| # Use Heredity chapter (has many questions) | |
| knowledge = NCERT_KNOWLEDGE.get("Heredity", {}) | |
| if not knowledge: | |
| print(" SKIP: No Heredity chapter found") | |
| return False | |
| gen._used_questions = set() | |
| easy_qs = gen.generate_mcqs(knowledge, 5, 'easy') | |
| gen._used_questions = set() | |
| hard_qs = gen.generate_mcqs(knowledge, 5, 'hard') | |
| easy_texts = set(q['question'] for q in easy_qs) | |
| hard_texts = set(q['question'] for q in hard_qs) | |
| overlap = easy_texts & hard_texts | |
| overlap_pct = len(overlap) / max(1, len(easy_texts)) * 100 | |
| print(f" Easy questions: {len(easy_texts)}") | |
| print(f" Hard questions: {len(hard_texts)}") | |
| print(f" Overlap: {len(overlap)} ({overlap_pct:.0f}%)") | |
| # Check Bloom's levels | |
| easy_levels = [q.get('bloomsLevel', 'L1') for q in easy_qs] | |
| hard_levels = [q.get('bloomsLevel', 'L1') for q in hard_qs] | |
| print(f" Easy Bloom's levels: {Counter(easy_levels)}") | |
| print(f" Hard Bloom's levels: {Counter(hard_levels)}") | |
| passed = overlap_pct < 80 # At least 20% different | |
| print(f" RESULT: {'PASS' if passed else 'FAIL'} (overlap < 80%: {overlap_pct:.0f}%)") | |
| return passed | |
| def test_bug02_deduplication(): | |
| """BUG-02: Verify no duplicate questions within a single paper.""" | |
| print("\n" + "=" * 60) | |
| print("TEST BUG-02: Question Deduplication") | |
| print("=" * 60) | |
| gen = QuestionPaperGenerator() | |
| config = { | |
| 'examType': 'Unit Test', | |
| 'sections': [ | |
| {'name': 'Section A', 'questionType': 'mcq', 'questionCount': 5, 'marksPerQuestion': 1}, | |
| {'name': 'Section B', 'questionType': 'mcq', 'questionCount': 5, 'marksPerQuestion': 1}, | |
| ], | |
| 'selectedTopics': ['heredity'], | |
| 'difficulty': {'easy': 30, 'medium': 50, 'hard': 20}, | |
| 'includeAnswerKey': True, | |
| } | |
| topic_contents = { | |
| 'heredity': "Class: 10\nSubject: Science\nChapter: Heredity\nTopics: Genetics, Mendel's Laws" | |
| } | |
| paper = gen.generate_paper(config, topic_contents) | |
| all_questions = [] | |
| for section in paper['sections']: | |
| for q in section['questions']: | |
| all_questions.append(q['question']) | |
| unique_questions = set(all_questions) | |
| duplicates = len(all_questions) - len(unique_questions) | |
| print(f" Total questions: {len(all_questions)}") | |
| print(f" Unique questions: {len(unique_questions)}") | |
| print(f" Duplicates: {duplicates}") | |
| passed = duplicates == 0 | |
| print(f" RESULT: {'PASS' if passed else 'FAIL'}") | |
| return passed | |
| def test_bug03_blooms_taxonomy(): | |
| """BUG-03: Verify Bloom's taxonomy classification works.""" | |
| print("\n" + "=" * 60) | |
| print("TEST BUG-03: Bloom's Taxonomy Enforcement") | |
| print("=" * 60) | |
| test_cases = [ | |
| ("What is photosynthesis?", "L1"), | |
| ("Define osmosis.", "L1"), | |
| ("Explain the process of digestion.", "L2"), | |
| ("Why do plants need sunlight?", "L2"), | |
| ("Calculate the ratio of offspring.", "L3"), | |
| ("Draw a labeled diagram of the heart.", "L3"), | |
| ("Compare mitosis and meiosis.", "L4"), | |
| ("Evaluate the effectiveness of vaccination.", "L5"), | |
| ("Design an experiment to test osmosis.", "L6"), | |
| ] | |
| correct = 0 | |
| gen = QuestionPaperGenerator() | |
| for question, expected_level in test_cases: | |
| actual_level = gen.classify_blooms_level(question) | |
| match = actual_level == expected_level | |
| correct += int(match) | |
| status = "OK" if match else "XX" | |
| print(f" {status} '{question[:50]}...' -> {actual_level} (expected {expected_level})") | |
| accuracy = correct / len(test_cases) * 100 | |
| passed = accuracy >= 70 # Allow some flexibility | |
| print(f"\n Accuracy: {accuracy:.0f}% ({correct}/{len(test_cases)})") | |
| print(f" RESULT: {'PASS' if passed else 'FAIL'}") | |
| return passed | |
| def test_bug04_multi_chapter_distribution(): | |
| """BUG-04: Verify questions come from multiple chapters when selected.""" | |
| print("\n" + "=" * 60) | |
| print("TEST BUG-04: Multi-Chapter Distribution") | |
| print("=" * 60) | |
| gen = QuestionPaperGenerator() | |
| config = { | |
| 'examType': 'Unit Test', | |
| 'sections': [ | |
| {'name': 'Section A', 'questionType': 'mcq', 'questionCount': 10, 'marksPerQuestion': 1}, | |
| ], | |
| 'difficulty': {'easy': 30, 'medium': 50, 'hard': 20}, | |
| } | |
| topic_contents = { | |
| 'heredity': "Class: 10\nSubject: Science\nChapter: Heredity\nTopics: Genetics", | |
| 'evolution': "Class: 10\nSubject: Science\nChapter: Our Environment\nTopics: Ecosystem", | |
| } | |
| paper = gen.generate_paper(config, topic_contents) | |
| chapters_used = set() | |
| for section in paper['sections']: | |
| for q in section['questions']: | |
| chapter = q.get('chapter', 'Unknown') | |
| chapters_used.add(chapter) | |
| print(f" Chapters selected: {len(topic_contents)}") | |
| print(f" Chapters in paper: {len(chapters_used)} — {chapters_used}") | |
| passed = len(chapters_used) >= 2 | |
| print(f" RESULT: {'PASS' if passed else 'FAIL'}") | |
| return passed | |
| def test_bug06_answer_distribution(): | |
| """BUG-06: Verify A/B/C/D answer distribution is roughly equal.""" | |
| print("\n" + "=" * 60) | |
| print("TEST BUG-06: Answer Distribution Balance") | |
| print("=" * 60) | |
| gen = QuestionPaperGenerator() | |
| config = { | |
| 'examType': 'Unit Test', | |
| 'sections': [ | |
| {'name': 'Section A', 'questionType': 'mcq', 'questionCount': 12, 'marksPerQuestion': 1}, | |
| ], | |
| 'difficulty': {'easy': 30, 'medium': 50, 'hard': 20}, | |
| } | |
| topic_contents = { | |
| 'heredity': "Class: 10\nSubject: Science\nChapter: Heredity\nTopics: Genetics", | |
| } | |
| paper = gen.generate_paper(config, topic_contents) | |
| answer_counts = Counter() | |
| for section in paper['sections']: | |
| for q in section['questions']: | |
| if q.get('type') == 'mcq': | |
| answer_counts[q.get('answer', '?')] += 1 | |
| total = sum(answer_counts.values()) | |
| target = total / 4 | |
| print(f" Answer distribution: {dict(answer_counts)}") | |
| print(f" Total MCQs: {total}, Target per letter: {target:.1f}") | |
| # Check that no letter has more than 50% of answers | |
| max_pct = max(answer_counts.values()) / max(1, total) * 100 | |
| passed = max_pct <= 50 # No single letter should have >50% | |
| print(f" Max concentration: {max_pct:.0f}%") | |
| print(f" RESULT: {'PASS' if passed else 'FAIL'}") | |
| return passed | |
| def test_difficulty_in_paper(): | |
| """Additional: Verify difficulty labels are real, not cosmetic.""" | |
| print("\n" + "=" * 60) | |
| print("TEST: Difficulty Labels Are Real (Not Cosmetic)") | |
| print("=" * 60) | |
| gen = QuestionPaperGenerator() | |
| config = { | |
| 'examType': 'Unit Test', | |
| 'sections': [ | |
| {'name': 'Section A', 'questionType': 'mcq', 'questionCount': 10, 'marksPerQuestion': 1}, | |
| ], | |
| 'difficulty': {'easy': 100, 'medium': 0, 'hard': 0}, | |
| } | |
| topic_contents = { | |
| 'heredity': "Class: 10\nSubject: Science\nChapter: Heredity\nTopics: Genetics", | |
| } | |
| paper_easy = gen.generate_paper(config, topic_contents) | |
| config['difficulty'] = {'easy': 0, 'medium': 0, 'hard': 100} | |
| paper_hard = gen.generate_paper(config, topic_contents) | |
| easy_qs = set() | |
| hard_qs = set() | |
| for section in paper_easy['sections']: | |
| for q in section['questions']: | |
| easy_qs.add(q['question']) | |
| for section in paper_hard['sections']: | |
| for q in section['questions']: | |
| hard_qs.add(q['question']) | |
| overlap = easy_qs & hard_qs | |
| overlap_pct = len(overlap) / max(1, len(easy_qs)) * 100 | |
| print(f" 100% Easy paper questions: {len(easy_qs)}") | |
| print(f" 100% Hard paper questions: {len(hard_qs)}") | |
| print(f" Overlap: {len(overlap)} ({overlap_pct:.0f}%)") | |
| # Difficulty labels | |
| easy_diffs = Counter() | |
| hard_diffs = Counter() | |
| for section in paper_easy['sections']: | |
| for q in section['questions']: | |
| easy_diffs[q.get('difficulty', '?')] += 1 | |
| for section in paper_hard['sections']: | |
| for q in section['questions']: | |
| hard_diffs[q.get('difficulty', '?')] += 1 | |
| print(f" Easy paper difficulty labels: {dict(easy_diffs)}") | |
| print(f" Hard paper difficulty labels: {dict(hard_diffs)}") | |
| passed = overlap_pct < 80 | |
| print(f" RESULT: {'PASS' if passed else 'FAIL'}") | |
| return passed | |
| if __name__ == '__main__': | |
| print("=" * 60) | |
| print("VedLinks AI/ML Quality Test Suite") | |
| print("=" * 60) | |
| results = {} | |
| results['BUG-01'] = test_bug01_difficulty_awareness() | |
| results['BUG-02'] = test_bug02_deduplication() | |
| results['BUG-03'] = test_bug03_blooms_taxonomy() | |
| results['BUG-04'] = test_bug04_multi_chapter_distribution() | |
| results['BUG-06'] = test_bug06_answer_distribution() | |
| results['DIFF_REAL'] = test_difficulty_in_paper() | |
| print("\n" + "=" * 60) | |
| print("SUMMARY") | |
| print("=" * 60) | |
| for test, passed in results.items(): | |
| print(f" {test}: {'PASS' if passed else 'FAIL'}") | |
| total = len(results) | |
| passed = sum(1 for v in results.values() if v) | |
| print(f"\n {passed}/{total} tests passed") | |
| sys.exit(0 if passed == total else 1) | |