Spaces:

karthikeya09
/

vedlinks-api

Sleeping

App Files Files Community

vedlinks-api / test_question_quality.py

karthikeya09

Upload test_question_quality.py with huggingface_hub

4e4897f verified 2 months ago

raw

history blame contribute delete

10.5 kB

	"""
	VedLinks AI/ML Quality Test Script

	Tests all 6 bug fixes:
	BUG-01: Difficulty-aware question selection
	BUG-02: Question deduplication
	BUG-03: Bloom's taxonomy enforcement
	BUG-04: Multi-chapter distribution
	BUG-05: Distractor quality (shuffle verification)
	BUG-06: Answer distribution balancing (A/B/C/D roughly equal)
	"""

	import sys
	import json
	from collections import Counter

	# Add project root to path
	sys.path.insert(0, '.')

	from question_paper_generator import QuestionPaperGenerator, NCERT_KNOWLEDGE, get_generator


	def test_bug01_difficulty_awareness():
	"""BUG-01: Verify that questions at different difficulty levels are different."""
	print("\n" + "=" * 60)
	print("TEST BUG-01: Difficulty-Aware Question Selection")
	print("=" * 60)

	gen = QuestionPaperGenerator()

	# Use Heredity chapter (has many questions)
	knowledge = NCERT_KNOWLEDGE.get("Heredity", {})
	if not knowledge:
	print(" SKIP: No Heredity chapter found")
	return False

	gen._used_questions = set()
	easy_qs = gen.generate_mcqs(knowledge, 5, 'easy')

	gen._used_questions = set()
	hard_qs = gen.generate_mcqs(knowledge, 5, 'hard')

	easy_texts = set(q['question'] for q in easy_qs)
	hard_texts = set(q['question'] for q in hard_qs)
	overlap = easy_texts & hard_texts

	overlap_pct = len(overlap) / max(1, len(easy_texts)) * 100

	print(f" Easy questions: {len(easy_texts)}")
	print(f" Hard questions: {len(hard_texts)}")
	print(f" Overlap: {len(overlap)} ({overlap_pct:.0f}%)")

	# Check Bloom's levels
	easy_levels = [q.get('bloomsLevel', 'L1') for q in easy_qs]
	hard_levels = [q.get('bloomsLevel', 'L1') for q in hard_qs]
	print(f" Easy Bloom's levels: {Counter(easy_levels)}")
	print(f" Hard Bloom's levels: {Counter(hard_levels)}")

	passed = overlap_pct < 80 # At least 20% different
	print(f" RESULT: {'PASS' if passed else 'FAIL'} (overlap < 80%: {overlap_pct:.0f}%)")
	return passed


	def test_bug02_deduplication():
	"""BUG-02: Verify no duplicate questions within a single paper."""
	print("\n" + "=" * 60)
	print("TEST BUG-02: Question Deduplication")
	print("=" * 60)

	gen = QuestionPaperGenerator()

	config = {
	'examType': 'Unit Test',
	'sections': [
	{'name': 'Section A', 'questionType': 'mcq', 'questionCount': 5, 'marksPerQuestion': 1},
	{'name': 'Section B', 'questionType': 'mcq', 'questionCount': 5, 'marksPerQuestion': 1},
	],
	'selectedTopics': ['heredity'],
	'difficulty': {'easy': 30, 'medium': 50, 'hard': 20},
	'includeAnswerKey': True,
	}

	topic_contents = {
	'heredity': "Class: 10\nSubject: Science\nChapter: Heredity\nTopics: Genetics, Mendel's Laws"
	}

	paper = gen.generate_paper(config, topic_contents)

	all_questions = []
	for section in paper['sections']:
	for q in section['questions']:
	all_questions.append(q['question'])

	unique_questions = set(all_questions)
	duplicates = len(all_questions) - len(unique_questions)

	print(f" Total questions: {len(all_questions)}")
	print(f" Unique questions: {len(unique_questions)}")
	print(f" Duplicates: {duplicates}")

	passed = duplicates == 0
	print(f" RESULT: {'PASS' if passed else 'FAIL'}")
	return passed


	def test_bug03_blooms_taxonomy():
	"""BUG-03: Verify Bloom's taxonomy classification works."""
	print("\n" + "=" * 60)
	print("TEST BUG-03: Bloom's Taxonomy Enforcement")
	print("=" * 60)

	test_cases = [
	("What is photosynthesis?", "L1"),
	("Define osmosis.", "L1"),
	("Explain the process of digestion.", "L2"),
	("Why do plants need sunlight?", "L2"),
	("Calculate the ratio of offspring.", "L3"),
	("Draw a labeled diagram of the heart.", "L3"),
	("Compare mitosis and meiosis.", "L4"),
	("Evaluate the effectiveness of vaccination.", "L5"),
	("Design an experiment to test osmosis.", "L6"),
	]

	correct = 0
	gen = QuestionPaperGenerator()

	for question, expected_level in test_cases:
	actual_level = gen.classify_blooms_level(question)
	match = actual_level == expected_level
	correct += int(match)
	status = "OK" if match else "XX"
	print(f" {status} '{question[:50]}...' -> {actual_level} (expected {expected_level})")

	accuracy = correct / len(test_cases) * 100
	passed = accuracy >= 70 # Allow some flexibility
	print(f"\n Accuracy: {accuracy:.0f}% ({correct}/{len(test_cases)})")
	print(f" RESULT: {'PASS' if passed else 'FAIL'}")
	return passed


	def test_bug04_multi_chapter_distribution():
	"""BUG-04: Verify questions come from multiple chapters when selected."""
	print("\n" + "=" * 60)
	print("TEST BUG-04: Multi-Chapter Distribution")
	print("=" * 60)

	gen = QuestionPaperGenerator()

	config = {
	'examType': 'Unit Test',
	'sections': [
	{'name': 'Section A', 'questionType': 'mcq', 'questionCount': 10, 'marksPerQuestion': 1},
	],
	'difficulty': {'easy': 30, 'medium': 50, 'hard': 20},
	}

	topic_contents = {
	'heredity': "Class: 10\nSubject: Science\nChapter: Heredity\nTopics: Genetics",
	'evolution': "Class: 10\nSubject: Science\nChapter: Our Environment\nTopics: Ecosystem",
	}

	paper = gen.generate_paper(config, topic_contents)

	chapters_used = set()
	for section in paper['sections']:
	for q in section['questions']:
	chapter = q.get('chapter', 'Unknown')
	chapters_used.add(chapter)

	print(f" Chapters selected: {len(topic_contents)}")
	print(f" Chapters in paper: {len(chapters_used)} — {chapters_used}")

	passed = len(chapters_used) >= 2
	print(f" RESULT: {'PASS' if passed else 'FAIL'}")
	return passed


	def test_bug06_answer_distribution():
	"""BUG-06: Verify A/B/C/D answer distribution is roughly equal."""
	print("\n" + "=" * 60)
	print("TEST BUG-06: Answer Distribution Balance")
	print("=" * 60)

	gen = QuestionPaperGenerator()

	config = {
	'examType': 'Unit Test',
	'sections': [
	{'name': 'Section A', 'questionType': 'mcq', 'questionCount': 12, 'marksPerQuestion': 1},
	],
	'difficulty': {'easy': 30, 'medium': 50, 'hard': 20},
	}

	topic_contents = {
	'heredity': "Class: 10\nSubject: Science\nChapter: Heredity\nTopics: Genetics",
	}

	paper = gen.generate_paper(config, topic_contents)

	answer_counts = Counter()
	for section in paper['sections']:
	for q in section['questions']:
	if q.get('type') == 'mcq':
	answer_counts[q.get('answer', '?')] += 1

	total = sum(answer_counts.values())
	target = total / 4

	print(f" Answer distribution: {dict(answer_counts)}")
	print(f" Total MCQs: {total}, Target per letter: {target:.1f}")

	# Check that no letter has more than 50% of answers
	max_pct = max(answer_counts.values()) / max(1, total) * 100

	passed = max_pct <= 50 # No single letter should have >50%
	print(f" Max concentration: {max_pct:.0f}%")
	print(f" RESULT: {'PASS' if passed else 'FAIL'}")
	return passed


	def test_difficulty_in_paper():
	"""Additional: Verify difficulty labels are real, not cosmetic."""
	print("\n" + "=" * 60)
	print("TEST: Difficulty Labels Are Real (Not Cosmetic)")
	print("=" * 60)

	gen = QuestionPaperGenerator()

	config = {
	'examType': 'Unit Test',
	'sections': [
	{'name': 'Section A', 'questionType': 'mcq', 'questionCount': 10, 'marksPerQuestion': 1},
	],
	'difficulty': {'easy': 100, 'medium': 0, 'hard': 0},
	}

	topic_contents = {
	'heredity': "Class: 10\nSubject: Science\nChapter: Heredity\nTopics: Genetics",
	}

	paper_easy = gen.generate_paper(config, topic_contents)

	config['difficulty'] = {'easy': 0, 'medium': 0, 'hard': 100}
	paper_hard = gen.generate_paper(config, topic_contents)

	easy_qs = set()
	hard_qs = set()

	for section in paper_easy['sections']:
	for q in section['questions']:
	easy_qs.add(q['question'])

	for section in paper_hard['sections']:
	for q in section['questions']:
	hard_qs.add(q['question'])

	overlap = easy_qs & hard_qs
	overlap_pct = len(overlap) / max(1, len(easy_qs)) * 100

	print(f" 100% Easy paper questions: {len(easy_qs)}")
	print(f" 100% Hard paper questions: {len(hard_qs)}")
	print(f" Overlap: {len(overlap)} ({overlap_pct:.0f}%)")

	# Difficulty labels
	easy_diffs = Counter()
	hard_diffs = Counter()
	for section in paper_easy['sections']:
	for q in section['questions']:
	easy_diffs[q.get('difficulty', '?')] += 1
	for section in paper_hard['sections']:
	for q in section['questions']:
	hard_diffs[q.get('difficulty', '?')] += 1

	print(f" Easy paper difficulty labels: {dict(easy_diffs)}")
	print(f" Hard paper difficulty labels: {dict(hard_diffs)}")

	passed = overlap_pct < 80
	print(f" RESULT: {'PASS' if passed else 'FAIL'}")
	return passed


	if __name__ == '__main__':
	print("=" * 60)
	print("VedLinks AI/ML Quality Test Suite")
	print("=" * 60)

	results = {}
	results['BUG-01'] = test_bug01_difficulty_awareness()
	results['BUG-02'] = test_bug02_deduplication()
	results['BUG-03'] = test_bug03_blooms_taxonomy()
	results['BUG-04'] = test_bug04_multi_chapter_distribution()
	results['BUG-06'] = test_bug06_answer_distribution()
	results['DIFF_REAL'] = test_difficulty_in_paper()

	print("\n" + "=" * 60)
	print("SUMMARY")
	print("=" * 60)
	for test, passed in results.items():
	print(f" {test}: {'PASS' if passed else 'FAIL'}")

	total = len(results)
	passed = sum(1 for v in results.values() if v)
	print(f"\n {passed}/{total} tests passed")

	sys.exit(0 if passed == total else 1)